From 98427aa6f34682d4a46fd41390ab7066adc11463 Mon Sep 17 00:00:00 2001 From: Antonio Valentino Date: Wed, 8 Jan 2025 00:32:06 +0000 Subject: [PATCH] Import pytables_3.10.2.orig.tar.gz [dgit import orig pytables_3.10.2.orig.tar.gz] --- ANNOUNCE.txt | 79 + LICENSE.txt | 35 + LICENSES/BLOSC.txt | 22 + LICENSES/CLOUD-SPTHEME.txt | 47 + LICENSES/FASTLZ.txt | 23 + LICENSES/H5PY.txt | 33 + LICENSES/HDF5.txt | 69 + LICENSES/LZ4.txt | 31 + LICENSES/SNAPPY.txt | 28 + LICENSES/STDINT.txt | 26 + LICENSES/WIN32PTHREADS.txt | 19 + LICENSES/ZLIB.txt | 22 + LICENSES/ZSTD.TXT | 30 + MANIFEST.in | 30 + Makefile | 82 + PKG-INFO | 47 + README.rst | 135 + THANKS | 85 + bench/100-trillion-baby.py | 87 + bench/LRU-experiments.py | 100 + bench/LRU-experiments2.py | 59 + bench/LRUcache-node-bench.py | 83 + bench/b2nd_compare_getslice.py | 284 + bench/b2nd_compare_getslice_plot.py | 441 + bench/bench-postgres-ranges.sh | 13 + bench/bench-pytables-ranges.sh | 30 + bench/bench-pytables.sh | 28 + bench/blosc.py | 167 + bench/bsddb-table-bench.py | 262 + bench/cacheout.py | 13 + bench/chunkshape-bench.py | 65 + bench/chunkshape-testing.py | 134 + bench/collations.py | 133 + bench/copy-bench.py | 35 + bench/create-large-number-objects.py | 45 + bench/deep-tree-h5py.py | 115 + bench/deep-tree.py | 130 + bench/evaluate.py | 187 + bench/expression.py | 184 + bench/fetch_meteo_data.py | 119 + bench/get-figures-ranges.py | 259 + bench/get-figures.py | 318 + bench/indexed_search.py | 509 + bench/keysort.py | 45 + bench/lookup_bench.py | 257 + bench/open_close-bench.py | 236 + bench/opteron-stress-test.txt | 63 + bench/optimal-chunksize.py | 134 + bench/plot-bar.py | 134 + bench/plot-comparison-lzo-zlib-ucl.gnuplot | 26 + bench/plot-comparison-psyco-lzo.gnuplot | 27 + bench/poly.py | 199 + bench/postgres-search-bench.py | 258 + bench/postgres_backend.py | 171 + bench/pytables-search-bench.py | 235 + bench/pytables_backend.py | 222 + bench/query_meteo_data.py | 88 + bench/read-table.py | 57 + bench/recarray2-test.py | 113 + bench/search-bench-plot.py | 172 + bench/search-bench-rnd.sh | 122 + bench/search-bench.py | 555 + bench/search-bench.sh | 123 + bench/searchsorted-bench.py | 362 + bench/searchsorted-bench2.py | 362 + bench/shelve-bench.py | 214 + bench/simple-table.py | 55 + bench/split-file.py | 39 + bench/sqlite-search-bench-rnd.sh | 105 + bench/sqlite-search-bench.py | 466 + bench/sqlite-search-bench.sh | 96 + bench/sqlite3-search-bench.py | 199 + bench/stress-test.py | 451 + bench/stress-test2.py | 255 + bench/stress-test3.py | 243 + bench/table-bench.py | 431 + bench/table-copy.py | 119 + bench/undo_redo.py | 245 + bench/undo_redo.txt | 103 + bench/widetree.py | 138 + bench/widetree2.py | 122 + bench/woody-pentiumIV.txt | 188 + c-blosc/blosc/bitshuffle-avx2.c | 258 + c-blosc/blosc/bitshuffle-avx2.h | 38 + c-blosc/blosc/bitshuffle-generic.c | 220 + c-blosc/blosc/bitshuffle-generic.h | 161 + c-blosc/blosc/bitshuffle-sse2.c | 480 + c-blosc/blosc/bitshuffle-sse2.h | 52 + c-blosc/blosc/blosc-common.h | 77 + c-blosc/blosc/blosc-comp-features.h | 21 + c-blosc/blosc/blosc-export.h | 45 + c-blosc/blosc/blosc.c | 2307 ++++ c-blosc/blosc/blosc.h | 535 + c-blosc/blosc/blosclz.c | 789 ++ c-blosc/blosc/blosclz.h | 69 + c-blosc/blosc/fastcopy.c | 639 ++ c-blosc/blosc/fastcopy.h | 19 + c-blosc/blosc/shuffle-avx2.c | 772 ++ c-blosc/blosc/shuffle-avx2.h | 36 + c-blosc/blosc/shuffle-generic.c | 25 + c-blosc/blosc/shuffle-generic.h | 99 + c-blosc/blosc/shuffle-sse2.c | 640 ++ c-blosc/blosc/shuffle-sse2.h | 36 + c-blosc/blosc/shuffle.c | 453 + c-blosc/blosc/shuffle.h | 67 + c-blosc/blosc/win32/pthread.c | 218 + c-blosc/blosc/win32/pthread.h | 112 + c-blosc/blosc/win32/stdint-windows.h | 259 + c-blosc/internal-complibs/lz4-1.9.4/lz4.c | 2722 +++++ c-blosc/internal-complibs/lz4-1.9.4/lz4.h | 842 ++ c-blosc/internal-complibs/lz4-1.9.4/lz4hc.c | 1631 +++ c-blosc/internal-complibs/lz4-1.9.4/lz4hc.h | 413 + .../internal-complibs/zlib-1.3.1/adler32.c | 164 + .../internal-complibs/zlib-1.3.1/compress.c | 75 + .../zlib-1.3.1/contrib/blast/blast.c | 466 + .../zlib-1.3.1/contrib/blast/blast.h | 83 + .../zlib-1.3.1/contrib/infback9/infback9.c | 603 ++ .../zlib-1.3.1/contrib/infback9/infback9.h | 37 + .../zlib-1.3.1/contrib/infback9/inffix9.h | 107 + .../zlib-1.3.1/contrib/infback9/inflate9.h | 47 + .../zlib-1.3.1/contrib/infback9/inftree9.c | 319 + .../zlib-1.3.1/contrib/infback9/inftree9.h | 61 + .../zlib-1.3.1/contrib/iostream/zfstream.h | 128 + .../zlib-1.3.1/contrib/iostream2/zstream.h | 307 + .../zlib-1.3.1/contrib/iostream3/test.cc | 50 + .../zlib-1.3.1/contrib/iostream3/zfstream.cc | 479 + .../zlib-1.3.1/contrib/iostream3/zfstream.h | 466 + .../zlib-1.3.1/contrib/minizip/crypt.h | 128 + .../zlib-1.3.1/contrib/minizip/ioapi.c | 231 + .../zlib-1.3.1/contrib/minizip/ioapi.h | 210 + .../zlib-1.3.1/contrib/minizip/iowin32.c | 440 + .../zlib-1.3.1/contrib/minizip/iowin32.h | 28 + .../zlib-1.3.1/contrib/minizip/miniunz.c | 647 ++ .../zlib-1.3.1/contrib/minizip/minizip.c | 509 + .../zlib-1.3.1/contrib/minizip/mztools.c | 285 + .../zlib-1.3.1/contrib/minizip/mztools.h | 37 + .../zlib-1.3.1/contrib/minizip/unzip.c | 1985 ++++ .../zlib-1.3.1/contrib/minizip/unzip.h | 437 + .../zlib-1.3.1/contrib/minizip/zip.c | 1956 ++++ .../zlib-1.3.1/contrib/minizip/zip.h | 364 + .../zlib-1.3.1/contrib/puff/puff.c | 840 ++ .../zlib-1.3.1/contrib/puff/puff.h | 35 + .../zlib-1.3.1/contrib/puff/pufftest.c | 165 + .../zlib-1.3.1/contrib/testzlib/testzlib.c | 275 + .../zlib-1.3.1/contrib/untgz/untgz.c | 667 ++ c-blosc/internal-complibs/zlib-1.3.1/crc32.c | 1049 ++ c-blosc/internal-complibs/zlib-1.3.1/crc32.h | 9446 +++++++++++++++++ .../internal-complibs/zlib-1.3.1/deflate.c | 2139 ++++ .../internal-complibs/zlib-1.3.1/deflate.h | 377 + .../zlib-1.3.1/examples/enough.c | 597 ++ .../zlib-1.3.1/examples/fitblk.c | 233 + .../zlib-1.3.1/examples/gun.c | 702 ++ .../zlib-1.3.1/examples/gzappend.c | 504 + .../zlib-1.3.1/examples/gzjoin.c | 449 + .../zlib-1.3.1/examples/gzlog.c | 1061 ++ .../zlib-1.3.1/examples/gzlog.h | 91 + .../zlib-1.3.1/examples/gznorm.c | 470 + .../zlib-1.3.1/examples/zpipe.c | 205 + .../zlib-1.3.1/examples/zran.c | 533 + .../zlib-1.3.1/examples/zran.h | 51 + .../internal-complibs/zlib-1.3.1/gzclose.c | 23 + c-blosc/internal-complibs/zlib-1.3.1/gzguts.h | 218 + c-blosc/internal-complibs/zlib-1.3.1/gzlib.c | 582 + c-blosc/internal-complibs/zlib-1.3.1/gzread.c | 602 ++ .../internal-complibs/zlib-1.3.1/gzwrite.c | 631 ++ .../internal-complibs/zlib-1.3.1/infback.c | 628 ++ .../internal-complibs/zlib-1.3.1/inffast.c | 320 + .../internal-complibs/zlib-1.3.1/inffast.h | 11 + .../internal-complibs/zlib-1.3.1/inffixed.h | 94 + .../internal-complibs/zlib-1.3.1/inflate.c | 1526 +++ .../internal-complibs/zlib-1.3.1/inflate.h | 126 + .../internal-complibs/zlib-1.3.1/inftrees.c | 299 + .../internal-complibs/zlib-1.3.1/inftrees.h | 62 + .../zlib-1.3.1/test/example.c | 546 + .../zlib-1.3.1/test/infcover.c | 672 ++ .../zlib-1.3.1/test/minigzip.c | 579 + c-blosc/internal-complibs/zlib-1.3.1/trees.c | 1117 ++ c-blosc/internal-complibs/zlib-1.3.1/trees.h | 128 + .../internal-complibs/zlib-1.3.1/uncompr.c | 85 + c-blosc/internal-complibs/zlib-1.3.1/zconf.h | 543 + c-blosc/internal-complibs/zlib-1.3.1/zlib.h | 1938 ++++ c-blosc/internal-complibs/zlib-1.3.1/zutil.c | 299 + c-blosc/internal-complibs/zlib-1.3.1/zutil.h | 254 + .../zstd-1.5.6/common/allocations.h | 55 + .../zstd-1.5.6/common/bits.h | 200 + .../zstd-1.5.6/common/bitstream.h | 457 + .../zstd-1.5.6/common/compiler.h | 450 + .../internal-complibs/zstd-1.5.6/common/cpu.h | 249 + .../zstd-1.5.6/common/debug.c | 30 + .../zstd-1.5.6/common/debug.h | 116 + .../zstd-1.5.6/common/entropy_common.c | 340 + .../zstd-1.5.6/common/error_private.c | 63 + .../zstd-1.5.6/common/error_private.h | 168 + .../internal-complibs/zstd-1.5.6/common/fse.h | 640 ++ .../zstd-1.5.6/common/fse_decompress.c | 313 + .../internal-complibs/zstd-1.5.6/common/huf.h | 286 + .../internal-complibs/zstd-1.5.6/common/mem.h | 426 + .../zstd-1.5.6/common/pool.c | 371 + .../zstd-1.5.6/common/pool.h | 90 + .../zstd-1.5.6/common/portability_macros.h | 158 + .../zstd-1.5.6/common/threading.c | 182 + .../zstd-1.5.6/common/threading.h | 150 + .../zstd-1.5.6/common/xxhash.c | 18 + .../zstd-1.5.6/common/xxhash.h | 7020 ++++++++++++ .../zstd-1.5.6/common/zstd_common.c | 48 + .../zstd-1.5.6/common/zstd_deps.h | 111 + .../zstd-1.5.6/common/zstd_internal.h | 392 + .../zstd-1.5.6/common/zstd_trace.h | 163 + .../zstd-1.5.6/compress/clevels.h | 134 + .../zstd-1.5.6/compress/fse_compress.c | 625 ++ .../zstd-1.5.6/compress/hist.c | 181 + .../zstd-1.5.6/compress/hist.h | 75 + .../zstd-1.5.6/compress/huf_compress.c | 1464 +++ .../zstd-1.5.6/compress/zstd_compress.c | 7153 +++++++++++++ .../compress/zstd_compress_internal.h | 1534 +++ .../compress/zstd_compress_literals.c | 235 + .../compress/zstd_compress_literals.h | 39 + .../compress/zstd_compress_sequences.c | 442 + .../compress/zstd_compress_sequences.h | 54 + .../compress/zstd_compress_superblock.c | 688 ++ .../compress/zstd_compress_superblock.h | 32 + .../zstd-1.5.6/compress/zstd_cwksp.h | 748 ++ .../zstd-1.5.6/compress/zstd_double_fast.c | 770 ++ .../zstd-1.5.6/compress/zstd_double_fast.h | 50 + .../zstd-1.5.6/compress/zstd_fast.c | 968 ++ .../zstd-1.5.6/compress/zstd_fast.h | 38 + .../zstd-1.5.6/compress/zstd_lazy.c | 2199 ++++ .../zstd-1.5.6/compress/zstd_lazy.h | 202 + .../zstd-1.5.6/compress/zstd_ldm.c | 730 ++ .../zstd-1.5.6/compress/zstd_ldm.h | 117 + .../zstd-1.5.6/compress/zstd_ldm_geartab.h | 106 + .../zstd-1.5.6/compress/zstd_opt.c | 1576 +++ .../zstd-1.5.6/compress/zstd_opt.h | 80 + .../zstd-1.5.6/compress/zstdmt_compress.c | 1882 ++++ .../zstd-1.5.6/compress/zstdmt_compress.h | 113 + .../zstd-1.5.6/decompress/huf_decompress.c | 1944 ++++ .../zstd-1.5.6/decompress/zstd_ddict.c | 244 + .../zstd-1.5.6/decompress/zstd_ddict.h | 44 + .../zstd-1.5.6/decompress/zstd_decompress.c | 2407 +++++ .../decompress/zstd_decompress_block.c | 2215 ++++ .../decompress/zstd_decompress_block.h | 73 + .../decompress/zstd_decompress_internal.h | 240 + .../zstd-1.5.6/deprecated/zbuff.h | 214 + .../zstd-1.5.6/deprecated/zbuff_common.c | 26 + .../zstd-1.5.6/deprecated/zbuff_compress.c | 167 + .../zstd-1.5.6/deprecated/zbuff_decompress.c | 77 + .../zstd-1.5.6/dictBuilder/cover.c | 1261 +++ .../zstd-1.5.6/dictBuilder/cover.h | 152 + .../zstd-1.5.6/dictBuilder/divsufsort.c | 1913 ++++ .../zstd-1.5.6/dictBuilder/divsufsort.h | 67 + .../zstd-1.5.6/dictBuilder/fastcover.c | 766 ++ .../zstd-1.5.6/dictBuilder/zdict.c | 1133 ++ .../zstd-1.5.6/legacy/zstd_legacy.h | 452 + .../zstd-1.5.6/legacy/zstd_v01.c | 2127 ++++ .../zstd-1.5.6/legacy/zstd_v01.h | 94 + .../zstd-1.5.6/legacy/zstd_v02.c | 3465 ++++++ .../zstd-1.5.6/legacy/zstd_v02.h | 93 + .../zstd-1.5.6/legacy/zstd_v03.c | 3105 ++++++ .../zstd-1.5.6/legacy/zstd_v03.h | 93 + .../zstd-1.5.6/legacy/zstd_v04.c | 3598 +++++++ .../zstd-1.5.6/legacy/zstd_v04.h | 142 + .../zstd-1.5.6/legacy/zstd_v05.c | 4005 +++++++ .../zstd-1.5.6/legacy/zstd_v05.h | 162 + .../zstd-1.5.6/legacy/zstd_v06.c | 4106 +++++++ .../zstd-1.5.6/legacy/zstd_v06.h | 172 + .../zstd-1.5.6/legacy/zstd_v07.c | 4490 ++++++++ .../zstd-1.5.6/legacy/zstd_v07.h | 187 + c-blosc/internal-complibs/zstd-1.5.6/zdict.h | 474 + c-blosc/internal-complibs/zstd-1.5.6/zstd.h | 3089 ++++++ .../zstd-1.5.6/zstd_errors.h | 114 + contrib/README | 10 + contrib/make_hdf.py | 355 + contrib/nctoh5.py | 63 + doc/Makefile | 20 + doc/make.bat | 35 + doc/scripts/filenode.py | 45 + doc/scripts/pickletrouble.py | 27 + doc/scripts/tutorial1.py | 304 + doc/source/FAQ.rst | 519 + doc/source/MIGRATING_TO_2.x.rst | 268 + doc/source/MIGRATING_TO_3.x.rst | 581 + doc/source/_static/logo-pytables-small.png | Bin 0 -> 16712 bytes doc/source/_templates/layout.html | 15 + doc/source/conf.py | 130 + doc/source/cookbook/custom_data_types.rst | 99 + doc/source/cookbook/hints_for_sql_users.rst | 723 ++ doc/source/cookbook/index.rst | 19 + doc/source/cookbook/inmemory_hdf5_files.rst | 139 + doc/source/cookbook/no_root_install.rst | 172 + doc/source/cookbook/py2exe_howto.rst | 82 + .../cookbook/py2exe_howto/pytables_test.py | 44 + doc/source/cookbook/simple_table.rst | 131 + .../cookbook/tailoring_atexit_hooks.rst | 59 + doc/source/cookbook/threading.rst | 272 + doc/source/dev_team.rst | 25 + doc/source/development.rst | 44 + doc/source/downloads.rst | 53 + doc/source/images/NumFocusSponsoredStamp.png | Bin 0 -> 13662 bytes doc/source/images/favicon.ico | Bin 0 -> 615 bytes doc/source/index.rst | 68 + doc/source/other_material.rst | 88 + doc/source/project_pointers.rst | 21 + .../release-notes/RELEASE_NOTES_v0.7.1.rst | 33 + .../release-notes/RELEASE_NOTES_v0.7.2.rst | 34 + .../release-notes/RELEASE_NOTES_v0.8.rst | 150 + .../release-notes/RELEASE_NOTES_v0.9.1.rst | 68 + .../release-notes/RELEASE_NOTES_v0.9.rst | 119 + .../release-notes/RELEASE_NOTES_v1.0.rst | 218 + .../release-notes/RELEASE_NOTES_v1.1.1.rst | 58 + .../release-notes/RELEASE_NOTES_v1.1.rst | 58 + .../release-notes/RELEASE_NOTES_v1.2.1.rst | 56 + .../release-notes/RELEASE_NOTES_v1.2.2.rst | 56 + .../release-notes/RELEASE_NOTES_v1.2.3.rst | 56 + .../release-notes/RELEASE_NOTES_v1.2.rst | 105 + .../release-notes/RELEASE_NOTES_v1.3.1.rst | 66 + .../release-notes/RELEASE_NOTES_v1.3.2.rst | 66 + .../release-notes/RELEASE_NOTES_v1.3.3.rst | 57 + .../release-notes/RELEASE_NOTES_v1.3.rst | 66 + .../release-notes/RELEASE_NOTES_v1.4.rst | 71 + .../RELEASE_NOTES_v2.0.x-pro.rst | 495 + .../release-notes/RELEASE_NOTES_v2.0.x.rst | 461 + .../RELEASE_NOTES_v2.1.x-pro.rst | 114 + .../release-notes/RELEASE_NOTES_v2.1.x.rst | 114 + .../RELEASE_NOTES_v2.2.x-pro.rst | 426 + .../release-notes/RELEASE_NOTES_v2.2.x.rst | 412 + .../release-notes/RELEASE_NOTES_v2.3.x.rst | 100 + .../release-notes/RELEASE_NOTES_v2.4.x.rst | 156 + .../release-notes/RELEASE_NOTES_v3.0.x.rst | 303 + .../release-notes/RELEASE_NOTES_v3.1.x.rst | 221 + .../release-notes/RELEASE_NOTES_v3.10.x.rst | 10 + .../release-notes/RELEASE_NOTES_v3.2.x.rst | 279 + .../release-notes/RELEASE_NOTES_v3.3.x.rst | 33 + .../release-notes/RELEASE_NOTES_v3.4.x.rst | 107 + .../release-notes/RELEASE_NOTES_v3.5.x.rst | 50 + .../release-notes/RELEASE_NOTES_v3.6.x.rst | 41 + .../release-notes/RELEASE_NOTES_v3.7.x.rst | 52 + .../release-notes/RELEASE_NOTES_v3.8.x.rst | 30 + .../release-notes/RELEASE_NOTES_v3.9.x.rst | 186 + doc/source/release_notes.rst | 153 + doc/source/usersguide/bibliography.rst | 163 + doc/source/usersguide/condition_syntax.rst | 137 + doc/source/usersguide/datatypes.rst | 117 + doc/source/usersguide/file_format.rst | 350 + doc/source/usersguide/filenode.rst | 291 + doc/source/usersguide/images/Q7-10m-noidx.png | Bin 0 -> 81873 bytes .../usersguide/images/Q8-1g-idx-SSD.png | Bin 0 -> 91417 bytes .../usersguide/images/Q8-1g-idx-compress.png | Bin 0 -> 95434 bytes .../usersguide/images/Q8-1g-idx-optlevels.png | Bin 0 -> 111142 bytes .../usersguide/images/Q8-1g-idx-sorted.png | Bin 0 -> 102858 bytes doc/source/usersguide/images/Q8-1g-noidx.png | Bin 0 -> 83169 bytes .../usersguide/images/b2nd_getslice_big.png | Bin 0 -> 68902 bytes .../usersguide/images/b2nd_getslice_small.png | Bin 0 -> 71351 bytes .../images/compressed-recordsize-shuffle.png | Bin 0 -> 55690 bytes .../images/compressed-recordsize-zlib.png | Bin 0 -> 40779 bytes .../images/compressed-recordsize.png | Bin 0 -> 42077 bytes .../compressed-select-cache-shuffle.png | Bin 0 -> 58355 bytes .../images/compressed-select-cache-zlib.png | Bin 0 -> 43630 bytes .../images/compressed-select-cache.png | Bin 0 -> 44305 bytes ...compressed-select-nocache-shuffle-only.png | Bin 0 -> 54301 bytes .../images/compressed-select-nocache.png | Bin 0 -> 60076 bytes .../images/compressed-writing-shuffle.png | Bin 0 -> 57395 bytes .../images/compressed-writing-zlib.png | Bin 0 -> 41596 bytes .../usersguide/images/compressed-writing.png | Bin 0 -> 43090 bytes .../images/cratio-zlib-blosc-blosc2.png | Bin 0 -> 25862 bytes .../images/create-chunksize-15GB.png | Bin 0 -> 69380 bytes .../create-index-time-int32-float64.png | Bin 0 -> 61390 bytes .../images/direct-chunking-AMD-7800X3D.png | Bin 0 -> 17639 bytes .../images/direct-chunking-i13900K.png | Bin 0 -> 17680 bytes .../images/filesizes-chunksize-15GB.png | Bin 0 -> 57535 bytes .../usersguide/images/indexes-sizes2.png | Bin 0 -> 61049 bytes .../usersguide/images/inkernel-vs-pandas.png | Bin 0 -> 26400 bytes .../images/inkernel-zlib-blosc-blosc2.png | Bin 0 -> 60556 bytes .../usersguide/images/objecttree-h5.png | Bin 0 -> 84624 bytes doc/source/usersguide/images/objecttree.pdf | Bin 0 -> 22134 bytes doc/source/usersguide/images/objecttree.png | Bin 0 -> 236392 bytes doc/source/usersguide/images/objecttree.svg | 828 ++ .../usersguide/images/pytables-front-logo.pdf | Bin 0 -> 76924 bytes .../images/random-chunksize-15GB.png | Bin 0 -> 62011 bytes .../read-medium-psyco-nopsyco-comparison.png | Bin 0 -> 65349 bytes .../usersguide/images/seq-chunksize-15GB.png | Bin 0 -> 70397 bytes .../images/tutorial1-1-tableview.png | Bin 0 -> 89678 bytes .../images/tutorial1-2-tableview.png | Bin 0 -> 88101 bytes .../usersguide/images/tutorial1-general.png | Bin 0 -> 48720 bytes .../usersguide/images/tutorial2-tableview.png | Bin 0 -> 109956 bytes .../write-medium-psyco-nopsyco-comparison.png | Bin 0 -> 60237 bytes doc/source/usersguide/index.rst | 68 + doc/source/usersguide/installation.rst | 586 + doc/source/usersguide/introduction.rst | 318 + doc/source/usersguide/libref.rst | 36 + .../usersguide/libref/declarative_classes.rst | 284 + doc/source/usersguide/libref/expr_class.rst | 37 + doc/source/usersguide/libref/file_class.rst | 137 + .../usersguide/libref/filenode_classes.rst | 150 + .../usersguide/libref/helper_classes.rst | 167 + .../usersguide/libref/hierarchy_classes.rst | 248 + .../usersguide/libref/homogenous_storage.rst | 125 + doc/source/usersguide/libref/link_classes.rst | 73 + .../usersguide/libref/structured_storage.rst | 261 + doc/source/usersguide/libref/top_level.rst | 38 + doc/source/usersguide/optimization.rst | 1389 +++ doc/source/usersguide/parameter_files.rst | 159 + doc/source/usersguide/tutorials.rst | 2332 ++++ doc/source/usersguide/usersguide.rst | 116 + doc/source/usersguide/utilities.rst | 499 + examples/add-column.py | 77 + examples/array1.py | 51 + examples/array2.py | 43 + examples/array3.py | 47 + examples/array4.py | 49 + examples/attributes1.py | 33 + examples/attrs-with-padding.py | 53 + examples/carray1.py | 20 + examples/check_examples.sh | 58 + examples/direct-chunk-shape.py | 121 + examples/direct-chunking.py | 108 + examples/earray1.py | 16 + examples/earray2.py | 91 + examples/filenodes1.py | 53 + examples/index.py | 43 + examples/inmemory.py | 56 + examples/links.py | 50 + examples/multiprocess_access_benchmarks.py | 251 + examples/multiprocess_access_queues.py | 195 + examples/nested-tut.py | 142 + examples/nested1.py | 85 + examples/objecttree.py | 52 + examples/particles.py | 129 + examples/play-with-enums.py | 104 + examples/read_array_out_arg.py | 57 + examples/simple_threading.py | 102 + examples/split.py | 39 + examples/table-tree.py | 302 + examples/table1.py | 76 + examples/table2.py | 42 + examples/table3.py | 66 + examples/tables-with-padding.py | 67 + examples/threading_monkeypatch.py | 137 + examples/tutorial1-1.py | 119 + examples/tutorial1-2.py | 305 + examples/tutorial2.py | 114 + examples/tutorial3-1.py | 49 + examples/tutorial3-2.py | 78 + examples/undo-redo.py | 142 + examples/vlarray1.py | 44 + examples/vlarray2.py | 109 + examples/vlarray3.py | 28 + examples/vlarray4.py | 28 + hdf5-blosc/src/blosc_filter.c | 272 + hdf5-blosc/src/blosc_filter.h | 27 + hdf5-blosc2/src/blosc2_filter.c | 602 ++ hdf5-blosc2/src/blosc2_filter.h | 47 + pyproject.toml | 207 + requirements-docs.txt | 312 + requirements.txt | 289 + setup.cfg | 4 + setup.py | 1281 +++ src/H5ARRAY-opt.c | 955 ++ src/H5ARRAY-opt.h | 73 + src/H5ARRAY.c | 909 ++ src/H5ARRAY.h | 95 + src/H5ATTR.c | 619 ++ src/H5ATTR.h | 86 + src/H5TB-opt.c | 1142 ++ src/H5TB-opt.h | 110 + src/H5VLARRAY.c | 413 + src/H5VLARRAY.h | 47 + src/H5Zbzip2.c | 194 + src/H5Zbzip2.h | 7 + src/H5Zlzo.c | 305 + src/H5Zlzo.h | 7 + src/idx-opt.c | 392 + src/idx-opt.h | 46 + src/tables.h | 8 + src/typeconv.c | 93 + src/typeconv.h | 37 + src/utils.c | 934 ++ src/utils.h | 123 + tables.egg-info/PKG-INFO | 47 + tables.egg-info/SOURCES.txt | 629 ++ tables.egg-info/dependency_links.txt | 1 + tables.egg-info/entry_points.txt | 5 + tables.egg-info/not-zip-safe | 1 + tables.egg-info/requires.txt | 6 + tables.egg-info/top_level.txt | 1 + tables/__init__.py | 245 + tables/_comp_bzip2.pyx | 19 + tables/_comp_lzo.pyx | 19 + tables/_version.py | 2 + tables/array.py | 993 ++ tables/atom.py | 1374 +++ tables/attributeset.py | 740 ++ tables/carray.py | 316 + tables/conditions.py | 530 + tables/definitions.pxd | 580 + tables/description.py | 1047 ++ tables/earray.py | 293 + tables/exceptions.py | 467 + tables/expression.py | 785 ++ tables/file.py | 3037 ++++++ tables/filters.py | 474 + tables/flavor.py | 456 + tables/group.py | 1314 +++ tables/hdf5extension.pxd | 43 + tables/hdf5extension.pyx | 2483 +++++ tables/idxutils.py | 505 + tables/index.py | 2551 +++++ tables/indexes.py | 190 + tables/indexesextension.pyx | 1545 +++ tables/leaf.py | 1063 ++ tables/link.py | 464 + tables/linkextension.pyx | 283 + tables/lrucacheextension.pxd | 83 + tables/lrucacheextension.pyx | 641 ++ tables/misc/__init__.py | 6 + tables/misc/enum.py | 437 + tables/misc/proxydict.py | 63 + tables/node.py | 946 ++ tables/nodes/__init__.py | 14 + tables/nodes/filenode.py | 852 ++ tables/nodes/tests/__init__.py | 1 + tables/nodes/tests/test_filenode.dat | 46 + tables/nodes/tests/test_filenode.py | 1147 ++ tables/nodes/tests/test_filenode.xbm | 46 + tables/nodes/tests/test_filenode_v1.h5 | Bin 0 -> 9062 bytes tables/parameters.py | 442 + tables/path.py | 227 + tables/registry.py | 75 + tables/req_versions.py | 16 + tables/scripts/__init__.py | 6 + tables/scripts/pt2to3.py | 541 + tables/scripts/ptdump.py | 197 + tables/scripts/ptrepack.py | 775 ++ tables/scripts/pttree.py | 505 + tables/table.py | 4114 +++++++ tables/tableextension.pyx | 1793 ++++ tables/tests/Table2_1_lzo_nrv2e_shuffle.h5 | Bin 0 -> 19206 bytes tables/tests/Tables_lzo1.h5 | Bin 0 -> 23363 bytes tables/tests/Tables_lzo1_shuffle.h5 | Bin 0 -> 21097 bytes tables/tests/Tables_lzo2.h5 | Bin 0 -> 23398 bytes tables/tests/Tables_lzo2_shuffle.h5 | Bin 0 -> 21097 bytes tables/tests/__init__.py | 11 + tables/tests/array_mdatom.h5 | Bin 0 -> 5150 bytes tables/tests/attr-u16.h5 | Bin 0 -> 28782 bytes tables/tests/b2nd-no-chunkshape.h5 | Bin 0 -> 5168 bytes tables/tests/blosc_bigendian.h5 | Bin 0 -> 11974 bytes tables/tests/bug-idx.h5 | Bin 0 -> 14649 bytes tables/tests/check_leaks.py | 383 + tables/tests/common.py | 395 + tables/tests/create_backcompat_indexes.py | 41 + tables/tests/elink.h5 | Bin 0 -> 3550 bytes tables/tests/elink2.h5 | Bin 0 -> 2238 bytes tables/tests/ex-noattr.h5 | Bin 0 -> 12342 bytes tables/tests/flavored_vlarrays-format1.6.h5 | Bin 0 -> 12621 bytes tables/tests/float.h5 | Bin 0 -> 4742 bytes tables/tests/idx-std-1.x.h5 | Bin 0 -> 26662 bytes tables/tests/indexes_2_0.h5 | Bin 0 -> 60801 bytes tables/tests/indexes_2_1.h5 | Bin 0 -> 147256 bytes tables/tests/issue_368.h5 | Bin 0 -> 1232 bytes tables/tests/issue_560.h5 | Bin 0 -> 2344 bytes tables/tests/itemsize.h5 | Bin 0 -> 2096 bytes tables/tests/matlab_file.mat | Bin 0 -> 1942 bytes tables/tests/nested-type-with-gaps.h5 | Bin 0 -> 1830 bytes tables/tests/non-chunked-table.h5 | Bin 0 -> 6184 bytes tables/tests/oldflavor_numeric.h5 | Bin 0 -> 112296 bytes tables/tests/out_of_order_types.h5 | Bin 0 -> 71001 bytes tables/tests/python2.h5 | Bin 0 -> 79658 bytes tables/tests/python3.h5 | Bin 0 -> 79658 bytes tables/tests/scalar.h5 | Bin 0 -> 8294 bytes tables/tests/slink.h5 | Bin 0 -> 5502 bytes tables/tests/smpl_SDSextendible.h5 | Bin 0 -> 6246 bytes tables/tests/smpl_compound_chunked.h5 | Bin 0 -> 5774 bytes tables/tests/smpl_enum.h5 | Bin 0 -> 2094 bytes tables/tests/smpl_f64be.h5 | Bin 0 -> 2294 bytes tables/tests/smpl_f64le.h5 | Bin 0 -> 2294 bytes tables/tests/smpl_i32be.h5 | Bin 0 -> 2174 bytes tables/tests/smpl_i32le.h5 | Bin 0 -> 2174 bytes tables/tests/smpl_i64be.h5 | Bin 0 -> 2294 bytes tables/tests/smpl_i64le.h5 | Bin 0 -> 2294 bytes tables/tests/smpl_unsupptype.h5 | Bin 0 -> 11870 bytes tables/tests/test_all.py | 59 + tables/tests/test_array.py | 2903 +++++ tables/tests/test_attributes.py | 2049 ++++ tables/tests/test_aux.py | 35 + tables/tests/test_backcompat.py | 228 + tables/tests/test_basics.py | 2675 +++++ tables/tests/test_carray.py | 3266 ++++++ tables/tests/test_create.py | 2839 +++++ tables/tests/test_direct_chunk.py | 418 + tables/tests/test_do_undo.py | 2790 +++++ tables/tests/test_earray.py | 3240 ++++++ tables/tests/test_enum.py | 745 ++ tables/tests/test_expression.py | 1742 +++ tables/tests/test_garbage.py | 53 + tables/tests/test_hdf5compat.py | 445 + tables/tests/test_index_backcompat.py | 166 + tables/tests/test_indexes.py | 2860 +++++ tables/tests/test_indexvalues.py | 3573 +++++++ tables/tests/test_large_tables.py | 110 + tables/tests/test_links.py | 654 ++ tables/tests/test_lists.py | 475 + tables/tests/test_nestedtypes.py | 1706 +++ tables/tests/test_numpy.py | 1444 +++ tables/tests/test_queries.py | 1379 +++ tables/tests/test_ref_array1.mat | Bin 0 -> 16192 bytes tables/tests/test_ref_array2.mat | Bin 0 -> 4832 bytes tables/tests/test_suite.py | 98 + tables/tests/test_szip.h5 | Bin 0 -> 5594 bytes tables/tests/test_tables.py | 8066 ++++++++++++++ tables/tests/test_tablesMD.py | 2510 +++++ tables/tests/test_timestamps.py | 189 + tables/tests/test_timetype.py | 565 + tables/tests/test_tree.py | 1241 +++ tables/tests/test_types.py | 335 + tables/tests/test_utils.py | 93 + tables/tests/test_vlarray.py | 4799 +++++++++ tables/tests/time-table-vlarray-1_x.h5 | Bin 0 -> 3766 bytes tables/tests/times-nested-be.h5 | Bin 0 -> 22674 bytes tables/tests/vlstr_attr.h5 | Bin 0 -> 5294 bytes tables/tests/vlunicode_endian.h5 | Bin 0 -> 82022 bytes tables/tests/zerodim-attrs-1.3.h5 | Bin 0 -> 5102 bytes tables/tests/zerodim-attrs-1.4.h5 | Bin 0 -> 4366 bytes tables/undoredo.py | 181 + tables/unimplemented.py | 175 + tables/utils.py | 471 + tables/utilsextension.pxd | 24 + tables/utilsextension.pyx | 1668 +++ tables/vlarray.py | 916 ++ utils/pt2to3 | 4 + utils/ptdump | 4 + utils/ptrepack | 4 + utils/pttree | 4 + 631 files changed, 270950 insertions(+) create mode 100644 ANNOUNCE.txt create mode 100644 LICENSE.txt create mode 100644 LICENSES/BLOSC.txt create mode 100644 LICENSES/CLOUD-SPTHEME.txt create mode 100644 LICENSES/FASTLZ.txt create mode 100644 LICENSES/H5PY.txt create mode 100644 LICENSES/HDF5.txt create mode 100644 LICENSES/LZ4.txt create mode 100644 LICENSES/SNAPPY.txt create mode 100644 LICENSES/STDINT.txt create mode 100644 LICENSES/WIN32PTHREADS.txt create mode 100644 LICENSES/ZLIB.txt create mode 100644 LICENSES/ZSTD.TXT create mode 100644 MANIFEST.in create mode 100644 Makefile create mode 100644 PKG-INFO create mode 100644 README.rst create mode 100644 THANKS create mode 100644 bench/100-trillion-baby.py create mode 100644 bench/LRU-experiments.py create mode 100644 bench/LRU-experiments2.py create mode 100644 bench/LRUcache-node-bench.py create mode 100644 bench/b2nd_compare_getslice.py create mode 100644 bench/b2nd_compare_getslice_plot.py create mode 100755 bench/bench-postgres-ranges.sh create mode 100755 bench/bench-pytables-ranges.sh create mode 100755 bench/bench-pytables.sh create mode 100644 bench/blosc.py create mode 100644 bench/bsddb-table-bench.py create mode 100644 bench/cacheout.py create mode 100644 bench/chunkshape-bench.py create mode 100644 bench/chunkshape-testing.py create mode 100644 bench/collations.py create mode 100644 bench/copy-bench.py create mode 100644 bench/create-large-number-objects.py create mode 100644 bench/deep-tree-h5py.py create mode 100644 bench/deep-tree.py create mode 100644 bench/evaluate.py create mode 100644 bench/expression.py create mode 100644 bench/fetch_meteo_data.py create mode 100644 bench/get-figures-ranges.py create mode 100644 bench/get-figures.py create mode 100644 bench/indexed_search.py create mode 100644 bench/keysort.py create mode 100644 bench/lookup_bench.py create mode 100644 bench/open_close-bench.py create mode 100644 bench/opteron-stress-test.txt create mode 100644 bench/optimal-chunksize.py create mode 100644 bench/plot-bar.py create mode 100644 bench/plot-comparison-lzo-zlib-ucl.gnuplot create mode 100644 bench/plot-comparison-psyco-lzo.gnuplot create mode 100644 bench/poly.py create mode 100644 bench/postgres-search-bench.py create mode 100644 bench/postgres_backend.py create mode 100644 bench/pytables-search-bench.py create mode 100644 bench/pytables_backend.py create mode 100644 bench/query_meteo_data.py create mode 100644 bench/read-table.py create mode 100644 bench/recarray2-test.py create mode 100644 bench/search-bench-plot.py create mode 100755 bench/search-bench-rnd.sh create mode 100644 bench/search-bench.py create mode 100755 bench/search-bench.sh create mode 100644 bench/searchsorted-bench.py create mode 100644 bench/searchsorted-bench2.py create mode 100644 bench/shelve-bench.py create mode 100644 bench/simple-table.py create mode 100644 bench/split-file.py create mode 100755 bench/sqlite-search-bench-rnd.sh create mode 100644 bench/sqlite-search-bench.py create mode 100755 bench/sqlite-search-bench.sh create mode 100644 bench/sqlite3-search-bench.py create mode 100644 bench/stress-test.py create mode 100644 bench/stress-test2.py create mode 100644 bench/stress-test3.py create mode 100644 bench/table-bench.py create mode 100644 bench/table-copy.py create mode 100644 bench/undo_redo.py create mode 100644 bench/undo_redo.txt create mode 100644 bench/widetree.py create mode 100644 bench/widetree2.py create mode 100644 bench/woody-pentiumIV.txt create mode 100644 c-blosc/blosc/bitshuffle-avx2.c create mode 100644 c-blosc/blosc/bitshuffle-avx2.h create mode 100644 c-blosc/blosc/bitshuffle-generic.c create mode 100644 c-blosc/blosc/bitshuffle-generic.h create mode 100644 c-blosc/blosc/bitshuffle-sse2.c create mode 100644 c-blosc/blosc/bitshuffle-sse2.h create mode 100644 c-blosc/blosc/blosc-common.h create mode 100644 c-blosc/blosc/blosc-comp-features.h create mode 100644 c-blosc/blosc/blosc-export.h create mode 100644 c-blosc/blosc/blosc.c create mode 100644 c-blosc/blosc/blosc.h create mode 100644 c-blosc/blosc/blosclz.c create mode 100644 c-blosc/blosc/blosclz.h create mode 100644 c-blosc/blosc/fastcopy.c create mode 100644 c-blosc/blosc/fastcopy.h create mode 100644 c-blosc/blosc/shuffle-avx2.c create mode 100644 c-blosc/blosc/shuffle-avx2.h create mode 100644 c-blosc/blosc/shuffle-generic.c create mode 100644 c-blosc/blosc/shuffle-generic.h create mode 100644 c-blosc/blosc/shuffle-sse2.c create mode 100644 c-blosc/blosc/shuffle-sse2.h create mode 100644 c-blosc/blosc/shuffle.c create mode 100644 c-blosc/blosc/shuffle.h create mode 100644 c-blosc/blosc/win32/pthread.c create mode 100644 c-blosc/blosc/win32/pthread.h create mode 100644 c-blosc/blosc/win32/stdint-windows.h create mode 100644 c-blosc/internal-complibs/lz4-1.9.4/lz4.c create mode 100644 c-blosc/internal-complibs/lz4-1.9.4/lz4.h create mode 100644 c-blosc/internal-complibs/lz4-1.9.4/lz4hc.c create mode 100644 c-blosc/internal-complibs/lz4-1.9.4/lz4hc.h create mode 100644 c-blosc/internal-complibs/zlib-1.3.1/adler32.c create mode 100644 c-blosc/internal-complibs/zlib-1.3.1/compress.c create mode 100644 c-blosc/internal-complibs/zlib-1.3.1/contrib/blast/blast.c create mode 100644 c-blosc/internal-complibs/zlib-1.3.1/contrib/blast/blast.h create mode 100644 c-blosc/internal-complibs/zlib-1.3.1/contrib/infback9/infback9.c create mode 100644 c-blosc/internal-complibs/zlib-1.3.1/contrib/infback9/infback9.h create mode 100644 c-blosc/internal-complibs/zlib-1.3.1/contrib/infback9/inffix9.h create mode 100644 c-blosc/internal-complibs/zlib-1.3.1/contrib/infback9/inflate9.h create mode 100644 c-blosc/internal-complibs/zlib-1.3.1/contrib/infback9/inftree9.c create mode 100644 c-blosc/internal-complibs/zlib-1.3.1/contrib/infback9/inftree9.h create mode 100644 c-blosc/internal-complibs/zlib-1.3.1/contrib/iostream/zfstream.h create mode 100644 c-blosc/internal-complibs/zlib-1.3.1/contrib/iostream2/zstream.h create mode 100644 c-blosc/internal-complibs/zlib-1.3.1/contrib/iostream3/test.cc create mode 100644 c-blosc/internal-complibs/zlib-1.3.1/contrib/iostream3/zfstream.cc create mode 100644 c-blosc/internal-complibs/zlib-1.3.1/contrib/iostream3/zfstream.h create mode 100644 c-blosc/internal-complibs/zlib-1.3.1/contrib/minizip/crypt.h create mode 100644 c-blosc/internal-complibs/zlib-1.3.1/contrib/minizip/ioapi.c create mode 100644 c-blosc/internal-complibs/zlib-1.3.1/contrib/minizip/ioapi.h create mode 100644 c-blosc/internal-complibs/zlib-1.3.1/contrib/minizip/iowin32.c create mode 100644 c-blosc/internal-complibs/zlib-1.3.1/contrib/minizip/iowin32.h create mode 100644 c-blosc/internal-complibs/zlib-1.3.1/contrib/minizip/miniunz.c create mode 100644 c-blosc/internal-complibs/zlib-1.3.1/contrib/minizip/minizip.c create mode 100644 c-blosc/internal-complibs/zlib-1.3.1/contrib/minizip/mztools.c create mode 100644 c-blosc/internal-complibs/zlib-1.3.1/contrib/minizip/mztools.h create mode 100644 c-blosc/internal-complibs/zlib-1.3.1/contrib/minizip/unzip.c create mode 100644 c-blosc/internal-complibs/zlib-1.3.1/contrib/minizip/unzip.h create mode 100644 c-blosc/internal-complibs/zlib-1.3.1/contrib/minizip/zip.c create mode 100644 c-blosc/internal-complibs/zlib-1.3.1/contrib/minizip/zip.h create mode 100644 c-blosc/internal-complibs/zlib-1.3.1/contrib/puff/puff.c create mode 100644 c-blosc/internal-complibs/zlib-1.3.1/contrib/puff/puff.h create mode 100644 c-blosc/internal-complibs/zlib-1.3.1/contrib/puff/pufftest.c create mode 100644 c-blosc/internal-complibs/zlib-1.3.1/contrib/testzlib/testzlib.c create mode 100644 c-blosc/internal-complibs/zlib-1.3.1/contrib/untgz/untgz.c create mode 100644 c-blosc/internal-complibs/zlib-1.3.1/crc32.c create mode 100644 c-blosc/internal-complibs/zlib-1.3.1/crc32.h create mode 100644 c-blosc/internal-complibs/zlib-1.3.1/deflate.c create mode 100644 c-blosc/internal-complibs/zlib-1.3.1/deflate.h create mode 100644 c-blosc/internal-complibs/zlib-1.3.1/examples/enough.c create mode 100644 c-blosc/internal-complibs/zlib-1.3.1/examples/fitblk.c create mode 100644 c-blosc/internal-complibs/zlib-1.3.1/examples/gun.c create mode 100644 c-blosc/internal-complibs/zlib-1.3.1/examples/gzappend.c create mode 100644 c-blosc/internal-complibs/zlib-1.3.1/examples/gzjoin.c create mode 100644 c-blosc/internal-complibs/zlib-1.3.1/examples/gzlog.c create mode 100644 c-blosc/internal-complibs/zlib-1.3.1/examples/gzlog.h create mode 100644 c-blosc/internal-complibs/zlib-1.3.1/examples/gznorm.c create mode 100644 c-blosc/internal-complibs/zlib-1.3.1/examples/zpipe.c create mode 100644 c-blosc/internal-complibs/zlib-1.3.1/examples/zran.c create mode 100644 c-blosc/internal-complibs/zlib-1.3.1/examples/zran.h create mode 100644 c-blosc/internal-complibs/zlib-1.3.1/gzclose.c create mode 100644 c-blosc/internal-complibs/zlib-1.3.1/gzguts.h create mode 100644 c-blosc/internal-complibs/zlib-1.3.1/gzlib.c create mode 100644 c-blosc/internal-complibs/zlib-1.3.1/gzread.c create mode 100644 c-blosc/internal-complibs/zlib-1.3.1/gzwrite.c create mode 100644 c-blosc/internal-complibs/zlib-1.3.1/infback.c create mode 100644 c-blosc/internal-complibs/zlib-1.3.1/inffast.c create mode 100644 c-blosc/internal-complibs/zlib-1.3.1/inffast.h create mode 100644 c-blosc/internal-complibs/zlib-1.3.1/inffixed.h create mode 100644 c-blosc/internal-complibs/zlib-1.3.1/inflate.c create mode 100644 c-blosc/internal-complibs/zlib-1.3.1/inflate.h create mode 100644 c-blosc/internal-complibs/zlib-1.3.1/inftrees.c create mode 100644 c-blosc/internal-complibs/zlib-1.3.1/inftrees.h create mode 100644 c-blosc/internal-complibs/zlib-1.3.1/test/example.c create mode 100644 c-blosc/internal-complibs/zlib-1.3.1/test/infcover.c create mode 100644 c-blosc/internal-complibs/zlib-1.3.1/test/minigzip.c create mode 100644 c-blosc/internal-complibs/zlib-1.3.1/trees.c create mode 100644 c-blosc/internal-complibs/zlib-1.3.1/trees.h create mode 100644 c-blosc/internal-complibs/zlib-1.3.1/uncompr.c create mode 100644 c-blosc/internal-complibs/zlib-1.3.1/zconf.h create mode 100644 c-blosc/internal-complibs/zlib-1.3.1/zlib.h create mode 100644 c-blosc/internal-complibs/zlib-1.3.1/zutil.c create mode 100644 c-blosc/internal-complibs/zlib-1.3.1/zutil.h create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/common/allocations.h create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/common/bits.h create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/common/bitstream.h create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/common/compiler.h create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/common/cpu.h create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/common/debug.c create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/common/debug.h create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/common/entropy_common.c create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/common/error_private.c create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/common/error_private.h create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/common/fse.h create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/common/fse_decompress.c create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/common/huf.h create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/common/mem.h create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/common/pool.c create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/common/pool.h create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/common/portability_macros.h create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/common/threading.c create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/common/threading.h create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/common/xxhash.c create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/common/xxhash.h create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/common/zstd_common.c create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/common/zstd_deps.h create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/common/zstd_internal.h create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/common/zstd_trace.h create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/compress/clevels.h create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/compress/fse_compress.c create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/compress/hist.c create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/compress/hist.h create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/compress/huf_compress.c create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/compress/zstd_compress.c create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/compress/zstd_compress_internal.h create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/compress/zstd_compress_literals.c create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/compress/zstd_compress_literals.h create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/compress/zstd_compress_sequences.c create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/compress/zstd_compress_sequences.h create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/compress/zstd_compress_superblock.c create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/compress/zstd_compress_superblock.h create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/compress/zstd_cwksp.h create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/compress/zstd_double_fast.c create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/compress/zstd_double_fast.h create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/compress/zstd_fast.c create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/compress/zstd_fast.h create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/compress/zstd_lazy.c create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/compress/zstd_lazy.h create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/compress/zstd_ldm.c create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/compress/zstd_ldm.h create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/compress/zstd_ldm_geartab.h create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/compress/zstd_opt.c create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/compress/zstd_opt.h create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/compress/zstdmt_compress.c create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/compress/zstdmt_compress.h create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/decompress/huf_decompress.c create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/decompress/zstd_ddict.c create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/decompress/zstd_ddict.h create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/decompress/zstd_decompress.c create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/decompress/zstd_decompress_block.c create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/decompress/zstd_decompress_block.h create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/decompress/zstd_decompress_internal.h create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/deprecated/zbuff.h create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/deprecated/zbuff_common.c create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/deprecated/zbuff_compress.c create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/deprecated/zbuff_decompress.c create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/dictBuilder/cover.c create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/dictBuilder/cover.h create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/dictBuilder/divsufsort.c create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/dictBuilder/divsufsort.h create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/dictBuilder/fastcover.c create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/dictBuilder/zdict.c create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/legacy/zstd_legacy.h create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/legacy/zstd_v01.c create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/legacy/zstd_v01.h create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/legacy/zstd_v02.c create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/legacy/zstd_v02.h create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/legacy/zstd_v03.c create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/legacy/zstd_v03.h create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/legacy/zstd_v04.c create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/legacy/zstd_v04.h create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/legacy/zstd_v05.c create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/legacy/zstd_v05.h create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/legacy/zstd_v06.c create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/legacy/zstd_v06.h create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/legacy/zstd_v07.c create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/legacy/zstd_v07.h create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/zdict.h create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/zstd.h create mode 100644 c-blosc/internal-complibs/zstd-1.5.6/zstd_errors.h create mode 100644 contrib/README create mode 100644 contrib/make_hdf.py create mode 100755 contrib/nctoh5.py create mode 100644 doc/Makefile create mode 100644 doc/make.bat create mode 100644 doc/scripts/filenode.py create mode 100644 doc/scripts/pickletrouble.py create mode 100644 doc/scripts/tutorial1.py create mode 100644 doc/source/FAQ.rst create mode 100644 doc/source/MIGRATING_TO_2.x.rst create mode 100644 doc/source/MIGRATING_TO_3.x.rst create mode 100644 doc/source/_static/logo-pytables-small.png create mode 100644 doc/source/_templates/layout.html create mode 100644 doc/source/conf.py create mode 100644 doc/source/cookbook/custom_data_types.rst create mode 100644 doc/source/cookbook/hints_for_sql_users.rst create mode 100644 doc/source/cookbook/index.rst create mode 100644 doc/source/cookbook/inmemory_hdf5_files.rst create mode 100644 doc/source/cookbook/no_root_install.rst create mode 100644 doc/source/cookbook/py2exe_howto.rst create mode 100644 doc/source/cookbook/py2exe_howto/pytables_test.py create mode 100644 doc/source/cookbook/simple_table.rst create mode 100644 doc/source/cookbook/tailoring_atexit_hooks.rst create mode 100644 doc/source/cookbook/threading.rst create mode 100644 doc/source/dev_team.rst create mode 100644 doc/source/development.rst create mode 100644 doc/source/downloads.rst create mode 100644 doc/source/images/NumFocusSponsoredStamp.png create mode 100644 doc/source/images/favicon.ico create mode 100644 doc/source/index.rst create mode 100644 doc/source/other_material.rst create mode 100644 doc/source/project_pointers.rst create mode 100644 doc/source/release-notes/RELEASE_NOTES_v0.7.1.rst create mode 100644 doc/source/release-notes/RELEASE_NOTES_v0.7.2.rst create mode 100644 doc/source/release-notes/RELEASE_NOTES_v0.8.rst create mode 100644 doc/source/release-notes/RELEASE_NOTES_v0.9.1.rst create mode 100644 doc/source/release-notes/RELEASE_NOTES_v0.9.rst create mode 100644 doc/source/release-notes/RELEASE_NOTES_v1.0.rst create mode 100644 doc/source/release-notes/RELEASE_NOTES_v1.1.1.rst create mode 100644 doc/source/release-notes/RELEASE_NOTES_v1.1.rst create mode 100644 doc/source/release-notes/RELEASE_NOTES_v1.2.1.rst create mode 100644 doc/source/release-notes/RELEASE_NOTES_v1.2.2.rst create mode 100644 doc/source/release-notes/RELEASE_NOTES_v1.2.3.rst create mode 100644 doc/source/release-notes/RELEASE_NOTES_v1.2.rst create mode 100644 doc/source/release-notes/RELEASE_NOTES_v1.3.1.rst create mode 100644 doc/source/release-notes/RELEASE_NOTES_v1.3.2.rst create mode 100644 doc/source/release-notes/RELEASE_NOTES_v1.3.3.rst create mode 100644 doc/source/release-notes/RELEASE_NOTES_v1.3.rst create mode 100644 doc/source/release-notes/RELEASE_NOTES_v1.4.rst create mode 100644 doc/source/release-notes/RELEASE_NOTES_v2.0.x-pro.rst create mode 100644 doc/source/release-notes/RELEASE_NOTES_v2.0.x.rst create mode 100644 doc/source/release-notes/RELEASE_NOTES_v2.1.x-pro.rst create mode 100644 doc/source/release-notes/RELEASE_NOTES_v2.1.x.rst create mode 100644 doc/source/release-notes/RELEASE_NOTES_v2.2.x-pro.rst create mode 100644 doc/source/release-notes/RELEASE_NOTES_v2.2.x.rst create mode 100644 doc/source/release-notes/RELEASE_NOTES_v2.3.x.rst create mode 100644 doc/source/release-notes/RELEASE_NOTES_v2.4.x.rst create mode 100644 doc/source/release-notes/RELEASE_NOTES_v3.0.x.rst create mode 100644 doc/source/release-notes/RELEASE_NOTES_v3.1.x.rst create mode 100644 doc/source/release-notes/RELEASE_NOTES_v3.10.x.rst create mode 100644 doc/source/release-notes/RELEASE_NOTES_v3.2.x.rst create mode 100644 doc/source/release-notes/RELEASE_NOTES_v3.3.x.rst create mode 100644 doc/source/release-notes/RELEASE_NOTES_v3.4.x.rst create mode 100644 doc/source/release-notes/RELEASE_NOTES_v3.5.x.rst create mode 100644 doc/source/release-notes/RELEASE_NOTES_v3.6.x.rst create mode 100644 doc/source/release-notes/RELEASE_NOTES_v3.7.x.rst create mode 100644 doc/source/release-notes/RELEASE_NOTES_v3.8.x.rst create mode 100644 doc/source/release-notes/RELEASE_NOTES_v3.9.x.rst create mode 100644 doc/source/release_notes.rst create mode 100644 doc/source/usersguide/bibliography.rst create mode 100644 doc/source/usersguide/condition_syntax.rst create mode 100644 doc/source/usersguide/datatypes.rst create mode 100644 doc/source/usersguide/file_format.rst create mode 100644 doc/source/usersguide/filenode.rst create mode 100644 doc/source/usersguide/images/Q7-10m-noidx.png create mode 100644 doc/source/usersguide/images/Q8-1g-idx-SSD.png create mode 100644 doc/source/usersguide/images/Q8-1g-idx-compress.png create mode 100644 doc/source/usersguide/images/Q8-1g-idx-optlevels.png create mode 100644 doc/source/usersguide/images/Q8-1g-idx-sorted.png create mode 100644 doc/source/usersguide/images/Q8-1g-noidx.png create mode 100644 doc/source/usersguide/images/b2nd_getslice_big.png create mode 100644 doc/source/usersguide/images/b2nd_getslice_small.png create mode 100644 doc/source/usersguide/images/compressed-recordsize-shuffle.png create mode 100644 doc/source/usersguide/images/compressed-recordsize-zlib.png create mode 100644 doc/source/usersguide/images/compressed-recordsize.png create mode 100644 doc/source/usersguide/images/compressed-select-cache-shuffle.png create mode 100644 doc/source/usersguide/images/compressed-select-cache-zlib.png create mode 100644 doc/source/usersguide/images/compressed-select-cache.png create mode 100644 doc/source/usersguide/images/compressed-select-nocache-shuffle-only.png create mode 100644 doc/source/usersguide/images/compressed-select-nocache.png create mode 100644 doc/source/usersguide/images/compressed-writing-shuffle.png create mode 100644 doc/source/usersguide/images/compressed-writing-zlib.png create mode 100644 doc/source/usersguide/images/compressed-writing.png create mode 100644 doc/source/usersguide/images/cratio-zlib-blosc-blosc2.png create mode 100644 doc/source/usersguide/images/create-chunksize-15GB.png create mode 100644 doc/source/usersguide/images/create-index-time-int32-float64.png create mode 100644 doc/source/usersguide/images/direct-chunking-AMD-7800X3D.png create mode 100644 doc/source/usersguide/images/direct-chunking-i13900K.png create mode 100644 doc/source/usersguide/images/filesizes-chunksize-15GB.png create mode 100644 doc/source/usersguide/images/indexes-sizes2.png create mode 100644 doc/source/usersguide/images/inkernel-vs-pandas.png create mode 100644 doc/source/usersguide/images/inkernel-zlib-blosc-blosc2.png create mode 100644 doc/source/usersguide/images/objecttree-h5.png create mode 100644 doc/source/usersguide/images/objecttree.pdf create mode 100644 doc/source/usersguide/images/objecttree.png create mode 100644 doc/source/usersguide/images/objecttree.svg create mode 100644 doc/source/usersguide/images/pytables-front-logo.pdf create mode 100644 doc/source/usersguide/images/random-chunksize-15GB.png create mode 100644 doc/source/usersguide/images/read-medium-psyco-nopsyco-comparison.png create mode 100644 doc/source/usersguide/images/seq-chunksize-15GB.png create mode 100644 doc/source/usersguide/images/tutorial1-1-tableview.png create mode 100644 doc/source/usersguide/images/tutorial1-2-tableview.png create mode 100644 doc/source/usersguide/images/tutorial1-general.png create mode 100644 doc/source/usersguide/images/tutorial2-tableview.png create mode 100644 doc/source/usersguide/images/write-medium-psyco-nopsyco-comparison.png create mode 100644 doc/source/usersguide/index.rst create mode 100644 doc/source/usersguide/installation.rst create mode 100644 doc/source/usersguide/introduction.rst create mode 100644 doc/source/usersguide/libref.rst create mode 100644 doc/source/usersguide/libref/declarative_classes.rst create mode 100644 doc/source/usersguide/libref/expr_class.rst create mode 100644 doc/source/usersguide/libref/file_class.rst create mode 100644 doc/source/usersguide/libref/filenode_classes.rst create mode 100644 doc/source/usersguide/libref/helper_classes.rst create mode 100644 doc/source/usersguide/libref/hierarchy_classes.rst create mode 100644 doc/source/usersguide/libref/homogenous_storage.rst create mode 100644 doc/source/usersguide/libref/link_classes.rst create mode 100644 doc/source/usersguide/libref/structured_storage.rst create mode 100644 doc/source/usersguide/libref/top_level.rst create mode 100644 doc/source/usersguide/optimization.rst create mode 100644 doc/source/usersguide/parameter_files.rst create mode 100644 doc/source/usersguide/tutorials.rst create mode 100644 doc/source/usersguide/usersguide.rst create mode 100644 doc/source/usersguide/utilities.rst create mode 100644 examples/add-column.py create mode 100644 examples/array1.py create mode 100644 examples/array2.py create mode 100644 examples/array3.py create mode 100644 examples/array4.py create mode 100644 examples/attributes1.py create mode 100644 examples/attrs-with-padding.py create mode 100644 examples/carray1.py create mode 100755 examples/check_examples.sh create mode 100644 examples/direct-chunk-shape.py create mode 100644 examples/direct-chunking.py create mode 100644 examples/earray1.py create mode 100644 examples/earray2.py create mode 100644 examples/filenodes1.py create mode 100644 examples/index.py create mode 100644 examples/inmemory.py create mode 100644 examples/links.py create mode 100644 examples/multiprocess_access_benchmarks.py create mode 100644 examples/multiprocess_access_queues.py create mode 100644 examples/nested-tut.py create mode 100644 examples/nested1.py create mode 100644 examples/objecttree.py create mode 100644 examples/particles.py create mode 100644 examples/play-with-enums.py create mode 100644 examples/read_array_out_arg.py create mode 100644 examples/simple_threading.py create mode 100644 examples/split.py create mode 100644 examples/table-tree.py create mode 100644 examples/table1.py create mode 100644 examples/table2.py create mode 100644 examples/table3.py create mode 100644 examples/tables-with-padding.py create mode 100644 examples/threading_monkeypatch.py create mode 100644 examples/tutorial1-1.py create mode 100644 examples/tutorial1-2.py create mode 100644 examples/tutorial2.py create mode 100644 examples/tutorial3-1.py create mode 100644 examples/tutorial3-2.py create mode 100644 examples/undo-redo.py create mode 100644 examples/vlarray1.py create mode 100644 examples/vlarray2.py create mode 100644 examples/vlarray3.py create mode 100644 examples/vlarray4.py create mode 100644 hdf5-blosc/src/blosc_filter.c create mode 100644 hdf5-blosc/src/blosc_filter.h create mode 100644 hdf5-blosc2/src/blosc2_filter.c create mode 100644 hdf5-blosc2/src/blosc2_filter.h create mode 100644 pyproject.toml create mode 100644 requirements-docs.txt create mode 100644 requirements.txt create mode 100644 setup.cfg create mode 100755 setup.py create mode 100644 src/H5ARRAY-opt.c create mode 100644 src/H5ARRAY-opt.h create mode 100644 src/H5ARRAY.c create mode 100644 src/H5ARRAY.h create mode 100644 src/H5ATTR.c create mode 100644 src/H5ATTR.h create mode 100644 src/H5TB-opt.c create mode 100644 src/H5TB-opt.h create mode 100644 src/H5VLARRAY.c create mode 100644 src/H5VLARRAY.h create mode 100644 src/H5Zbzip2.c create mode 100644 src/H5Zbzip2.h create mode 100644 src/H5Zlzo.c create mode 100644 src/H5Zlzo.h create mode 100644 src/idx-opt.c create mode 100644 src/idx-opt.h create mode 100644 src/tables.h create mode 100644 src/typeconv.c create mode 100644 src/typeconv.h create mode 100644 src/utils.c create mode 100644 src/utils.h create mode 100644 tables.egg-info/PKG-INFO create mode 100644 tables.egg-info/SOURCES.txt create mode 100644 tables.egg-info/dependency_links.txt create mode 100644 tables.egg-info/entry_points.txt create mode 100644 tables.egg-info/not-zip-safe create mode 100644 tables.egg-info/requires.txt create mode 100644 tables.egg-info/top_level.txt create mode 100644 tables/__init__.py create mode 100644 tables/_comp_bzip2.pyx create mode 100644 tables/_comp_lzo.pyx create mode 100644 tables/_version.py create mode 100644 tables/array.py create mode 100644 tables/atom.py create mode 100644 tables/attributeset.py create mode 100644 tables/carray.py create mode 100644 tables/conditions.py create mode 100644 tables/definitions.pxd create mode 100644 tables/description.py create mode 100644 tables/earray.py create mode 100644 tables/exceptions.py create mode 100644 tables/expression.py create mode 100644 tables/file.py create mode 100644 tables/filters.py create mode 100644 tables/flavor.py create mode 100644 tables/group.py create mode 100644 tables/hdf5extension.pxd create mode 100644 tables/hdf5extension.pyx create mode 100644 tables/idxutils.py create mode 100644 tables/index.py create mode 100644 tables/indexes.py create mode 100644 tables/indexesextension.pyx create mode 100644 tables/leaf.py create mode 100644 tables/link.py create mode 100644 tables/linkextension.pyx create mode 100644 tables/lrucacheextension.pxd create mode 100644 tables/lrucacheextension.pyx create mode 100644 tables/misc/__init__.py create mode 100644 tables/misc/enum.py create mode 100644 tables/misc/proxydict.py create mode 100644 tables/node.py create mode 100644 tables/nodes/__init__.py create mode 100644 tables/nodes/filenode.py create mode 100644 tables/nodes/tests/__init__.py create mode 100644 tables/nodes/tests/test_filenode.dat create mode 100644 tables/nodes/tests/test_filenode.py create mode 100644 tables/nodes/tests/test_filenode.xbm create mode 100644 tables/nodes/tests/test_filenode_v1.h5 create mode 100644 tables/parameters.py create mode 100644 tables/path.py create mode 100644 tables/registry.py create mode 100644 tables/req_versions.py create mode 100644 tables/scripts/__init__.py create mode 100644 tables/scripts/pt2to3.py create mode 100644 tables/scripts/ptdump.py create mode 100644 tables/scripts/ptrepack.py create mode 100644 tables/scripts/pttree.py create mode 100644 tables/table.py create mode 100644 tables/tableextension.pyx create mode 100644 tables/tests/Table2_1_lzo_nrv2e_shuffle.h5 create mode 100644 tables/tests/Tables_lzo1.h5 create mode 100644 tables/tests/Tables_lzo1_shuffle.h5 create mode 100644 tables/tests/Tables_lzo2.h5 create mode 100644 tables/tests/Tables_lzo2_shuffle.h5 create mode 100644 tables/tests/__init__.py create mode 100644 tables/tests/array_mdatom.h5 create mode 100644 tables/tests/attr-u16.h5 create mode 100644 tables/tests/b2nd-no-chunkshape.h5 create mode 100644 tables/tests/blosc_bigendian.h5 create mode 100644 tables/tests/bug-idx.h5 create mode 100644 tables/tests/check_leaks.py create mode 100644 tables/tests/common.py create mode 100644 tables/tests/create_backcompat_indexes.py create mode 100644 tables/tests/elink.h5 create mode 100644 tables/tests/elink2.h5 create mode 100644 tables/tests/ex-noattr.h5 create mode 100644 tables/tests/flavored_vlarrays-format1.6.h5 create mode 100644 tables/tests/float.h5 create mode 100644 tables/tests/idx-std-1.x.h5 create mode 100644 tables/tests/indexes_2_0.h5 create mode 100644 tables/tests/indexes_2_1.h5 create mode 100644 tables/tests/issue_368.h5 create mode 100644 tables/tests/issue_560.h5 create mode 100644 tables/tests/itemsize.h5 create mode 100644 tables/tests/matlab_file.mat create mode 100644 tables/tests/nested-type-with-gaps.h5 create mode 100644 tables/tests/non-chunked-table.h5 create mode 100644 tables/tests/oldflavor_numeric.h5 create mode 100644 tables/tests/out_of_order_types.h5 create mode 100644 tables/tests/python2.h5 create mode 100644 tables/tests/python3.h5 create mode 100644 tables/tests/scalar.h5 create mode 100644 tables/tests/slink.h5 create mode 100644 tables/tests/smpl_SDSextendible.h5 create mode 100644 tables/tests/smpl_compound_chunked.h5 create mode 100644 tables/tests/smpl_enum.h5 create mode 100644 tables/tests/smpl_f64be.h5 create mode 100644 tables/tests/smpl_f64le.h5 create mode 100644 tables/tests/smpl_i32be.h5 create mode 100644 tables/tests/smpl_i32le.h5 create mode 100644 tables/tests/smpl_i64be.h5 create mode 100644 tables/tests/smpl_i64le.h5 create mode 100644 tables/tests/smpl_unsupptype.h5 create mode 100644 tables/tests/test_all.py create mode 100644 tables/tests/test_array.py create mode 100644 tables/tests/test_attributes.py create mode 100644 tables/tests/test_aux.py create mode 100644 tables/tests/test_backcompat.py create mode 100644 tables/tests/test_basics.py create mode 100644 tables/tests/test_carray.py create mode 100644 tables/tests/test_create.py create mode 100644 tables/tests/test_direct_chunk.py create mode 100644 tables/tests/test_do_undo.py create mode 100644 tables/tests/test_earray.py create mode 100644 tables/tests/test_enum.py create mode 100644 tables/tests/test_expression.py create mode 100644 tables/tests/test_garbage.py create mode 100644 tables/tests/test_hdf5compat.py create mode 100644 tables/tests/test_index_backcompat.py create mode 100644 tables/tests/test_indexes.py create mode 100644 tables/tests/test_indexvalues.py create mode 100644 tables/tests/test_large_tables.py create mode 100644 tables/tests/test_links.py create mode 100644 tables/tests/test_lists.py create mode 100644 tables/tests/test_nestedtypes.py create mode 100644 tables/tests/test_numpy.py create mode 100644 tables/tests/test_queries.py create mode 100644 tables/tests/test_ref_array1.mat create mode 100644 tables/tests/test_ref_array2.mat create mode 100644 tables/tests/test_suite.py create mode 100644 tables/tests/test_szip.h5 create mode 100644 tables/tests/test_tables.py create mode 100644 tables/tests/test_tablesMD.py create mode 100644 tables/tests/test_timestamps.py create mode 100644 tables/tests/test_timetype.py create mode 100644 tables/tests/test_tree.py create mode 100644 tables/tests/test_types.py create mode 100644 tables/tests/test_utils.py create mode 100644 tables/tests/test_vlarray.py create mode 100644 tables/tests/time-table-vlarray-1_x.h5 create mode 100644 tables/tests/times-nested-be.h5 create mode 100644 tables/tests/vlstr_attr.h5 create mode 100644 tables/tests/vlunicode_endian.h5 create mode 100644 tables/tests/zerodim-attrs-1.3.h5 create mode 100644 tables/tests/zerodim-attrs-1.4.h5 create mode 100644 tables/undoredo.py create mode 100644 tables/unimplemented.py create mode 100644 tables/utils.py create mode 100644 tables/utilsextension.pxd create mode 100644 tables/utilsextension.pyx create mode 100644 tables/vlarray.py create mode 100755 utils/pt2to3 create mode 100755 utils/ptdump create mode 100755 utils/ptrepack create mode 100755 utils/pttree diff --git a/ANNOUNCE.txt b/ANNOUNCE.txt new file mode 100644 index 0000000..e448643 --- /dev/null +++ b/ANNOUNCE.txt @@ -0,0 +1,79 @@ +=========================== + Announcing PyTables 3.10.2 +=========================== + +We are happy to announce PyTables 3.10.2. + + +What's new +========== + +This release of PyTables is mostly intended to make available a +complete set of Python wheel packages. + +The source code of the package has been completely reformatted +to be fully PEP-8 compliant and in line with the latest PyQA standards. +Checking of formatting and cod quality is now regularly performed in CI. + +Finally some small bug-fixes and enhancements have been implemented. + +In case you want to know more in detail what has changed in this +version, please refer to: http://www.pytables.org/release_notes.html + +You can install it via pip or download a source package with generated +PDF and HTML docs from: +https://github.com/PyTables/PyTables/releases/v3.10.2 + +For an online version of the manual, visit: +http://www.pytables.org/usersguide/index.html + + +What it is? +=========== + +PyTables is a library for managing hierarchical datasets and +designed to efficiently cope with extremely large amounts of data with +support for full 64-bit file addressing. PyTables runs on top of +the HDF5 library and NumPy package for achieving maximum throughput and +convenient use. PyTables includes OPSI, a new indexing technology, +allowing to perform data lookups in tables exceeding 10 gigarows +(10**10 rows) in less than a tenth of a second. + + +Resources +========= + +About PyTables: http://www.pytables.org + +About the HDF5 library: http://hdfgroup.org/HDF5/ + +About NumPy: http://numpy.org/ + + +Acknowledgments +=============== + +Thanks to many users who provided feature improvements, patches, bug +reports, support and suggestions. See the ``THANKS`` file in the +distribution package for a (incomplete) list of contributors. Most +specially, a lot of kudos go to the HDF5 and NumPy makers. +Without them, PyTables simply would not exist. + + +Share your experience +===================== + +Let us know of any bugs, suggestions, gripes, kudos, etc. you may have. + + +---- + + **Enjoy data!** + + -- The PyTables Developers + +.. Local Variables: +.. mode: rst +.. coding: utf-8 +.. fill-column: 72 +.. End: diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 0000000..93b8a98 --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,35 @@ +Copyright Notice and Statement for PyTables Software Library and Utilities: + +Copyright (c) 2002-2004 by Francesc Alted +Copyright (c) 2005-2007 by Carabos Coop. V. +Copyright (c) 2008-2010 by Francesc Alted +Copyright (c) 2011-2025 by PyTables maintainers +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the + distribution. + +3. Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/LICENSES/BLOSC.txt b/LICENSES/BLOSC.txt new file mode 100644 index 0000000..d9b364e --- /dev/null +++ b/LICENSES/BLOSC.txt @@ -0,0 +1,22 @@ +Blosc - A blocking, shuffling and lossless compression library + +Copyright (C) 2009-2012 Francesc Alted +Copyright (C) 2013 Francesc Alted + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/LICENSES/CLOUD-SPTHEME.txt b/LICENSES/CLOUD-SPTHEME.txt new file mode 100644 index 0000000..3144724 --- /dev/null +++ b/LICENSES/CLOUD-SPTHEME.txt @@ -0,0 +1,47 @@ +.. -*- restructuredtext -*- + +=================== +Copyright & License +=================== + +Cloud Sphinx Theme +================== +cloud_sptheme is released under the BSD license, +and is (c) `Assurance Technologies `_:: + + The "cloud_sptheme" python package and artwork is + Copyright (c) 2010-2012 by Assurance Technologies, LLC. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of Assurance Technologies, nor the names of the + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Other Content +============= +Most of the icons in ``cloud_sptheme:themes/cloud/static`` +are from the `Tango Icon Project `_, +which has released them into the Public Domain. diff --git a/LICENSES/FASTLZ.txt b/LICENSES/FASTLZ.txt new file mode 100644 index 0000000..1e207ac --- /dev/null +++ b/LICENSES/FASTLZ.txt @@ -0,0 +1,23 @@ +FastLZ - lightning-fast lossless compression library + +Copyright (C) 2007 Ariya Hidayat (ariya@kde.org) +Copyright (C) 2006 Ariya Hidayat (ariya@kde.org) +Copyright (C) 2005 Ariya Hidayat (ariya@kde.org) + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/LICENSES/H5PY.txt b/LICENSES/H5PY.txt new file mode 100644 index 0000000..bbd74ef --- /dev/null +++ b/LICENSES/H5PY.txt @@ -0,0 +1,33 @@ +Copyright Notice and Statement for the h5py Project + +Copyright (c) 2008 Andrew Collette +http://www.h5py.org +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +a. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +b. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the + distribution. + +c. Neither the name of the author nor the names of contributors may + be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/LICENSES/HDF5.txt b/LICENSES/HDF5.txt new file mode 100644 index 0000000..54126de --- /dev/null +++ b/LICENSES/HDF5.txt @@ -0,0 +1,69 @@ +HDF5 (Hierarchical Data Format 5) Software Library and Utilities +Copyright 2006-2007 by The HDF Group (THG). + +NCSA HDF5 (Hierarchical Data Format 5) Software Library and Utilities +Copyright 1998-2006 by the Board of Trustees of the University of Illinois. + +All rights reserved. + +Contributors: National Center for Supercomputing Applications (NCSA) +at the University of Illinois, Fortner Software, Unidata Program +Center (netCDF), The Independent JPEG Group (JPEG), Jean-loup Gailly +and Mark Adler (gzip), and Digital Equipment Corporation (DEC). + +Redistribution and use in source and binary forms, with or without +modification, are permitted for any purpose (including commercial +purposes) provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright +notice, this list of conditions, and the following disclaimer. + 2. Redistributions in binary form must reproduce the above +copyright notice, this list of conditions, and the following +disclaimer in the documentation and/or materials provided with the +distribution. + 3. In addition, redistributions of modified forms of the source or +binary code must carry prominent notices stating that the original +code was changed and the date of the change. + 4. All publications or advertising materials mentioning features or +use of this software are asked, but not required, to acknowledge that +it was developed by The HDF Group and by the National Center for +Supercomputing Applications at the University of Illinois at +Urbana-Champaign and credit the contributors. + 5. Neither the name of The HDF Group, the name of the University, +nor the name of any Contributor may be used to endorse or promote +products derived from this software without specific prior written +permission from THG, the University, or the Contributor, respectively. + +DISCLAIMER: THIS SOFTWARE IS PROVIDED BY THE HDF GROUP (THG) AND THE +CONTRIBUTORS "AS IS" WITH NO WARRANTY OF ANY KIND, EITHER EXPRESSED OR +IMPLIED. In no event shall THG or the Contributors be liable for any +damages suffered by the users arising out of the use of this software, +even if advised of the possibility of such damage. + +Portions of HDF5 were developed with support from the University of +California, Lawrence Livermore National Laboratory (UC LLNL). The +following statement applies to those portions of the product and must +be retained in any redistribution of source code, binaries, +documentation, and/or accompanying materials: + +This work was partially produced at the University of California, +Lawrence Livermore National Laboratory (UC LLNL) under contract +no. W-7405-ENG-48 (Contract 48) between the U.S. Department of Energy +(DOE) and The Regents of the University of California (University) for +the operation of UC LLNL. + +DISCLAIMER: This work was prepared as an account of work sponsored by +an agency of the United States Government. Neither the United States +Government nor the University of California nor any of their +employees, makes any warranty, express or implied, or assumes any +liability or responsibility for the accuracy, completeness, or +usefulness of any information, apparatus, product, or process +disclosed, or represents that its use would not infringe privately- +owned rights. Reference herein to any specific commercial products, +process, or service by trade name, trademark, manufacturer, or +otherwise, does not necessarily constitute or imply its endorsement, +recommendation, or favoring by the United States Government or the +University of California. The views and opinions of authors expressed +herein do not necessarily state or reflect those of the United States +Government or the University of California, and shall not be used for +advertising or product endorsement purposes. diff --git a/LICENSES/LZ4.txt b/LICENSES/LZ4.txt new file mode 100644 index 0000000..a758a82 --- /dev/null +++ b/LICENSES/LZ4.txt @@ -0,0 +1,31 @@ +LZ4 - Fast LZ compression algorithm + +Copyright (C) 2011-2014, Yann Collet. +BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +You can contact the author at : +- LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html +- LZ4 source repository : http://code.google.com/p/lz4/ diff --git a/LICENSES/SNAPPY.txt b/LICENSES/SNAPPY.txt new file mode 100644 index 0000000..8d6bd9f --- /dev/null +++ b/LICENSES/SNAPPY.txt @@ -0,0 +1,28 @@ +Copyright 2011, Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/LICENSES/STDINT.txt b/LICENSES/STDINT.txt new file mode 100644 index 0000000..8479b16 --- /dev/null +++ b/LICENSES/STDINT.txt @@ -0,0 +1,26 @@ +Copyright (c) 2006-2013 Alexander Chemeris + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. Neither the name of the product nor the names of its contributors may + be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED +WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO +EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/LICENSES/WIN32PTHREADS.txt b/LICENSES/WIN32PTHREADS.txt new file mode 100644 index 0000000..bd5ced5 --- /dev/null +++ b/LICENSES/WIN32PTHREADS.txt @@ -0,0 +1,19 @@ +Copyright (C) 2009 Andrzej K. Haczewski + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/LICENSES/ZLIB.txt b/LICENSES/ZLIB.txt new file mode 100644 index 0000000..5d74f5c --- /dev/null +++ b/LICENSES/ZLIB.txt @@ -0,0 +1,22 @@ +Copyright notice: + + (C) 1995-2013 Jean-loup Gailly and Mark Adler + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + Jean-loup Gailly Mark Adler + jloup@gzip.org madler@alumni.caltech.edu diff --git a/LICENSES/ZSTD.TXT b/LICENSES/ZSTD.TXT new file mode 100644 index 0000000..a793a80 --- /dev/null +++ b/LICENSES/ZSTD.TXT @@ -0,0 +1,30 @@ +BSD License + +For Zstandard software + +Copyright (c) 2016-present, Facebook, Inc. All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + * Neither the name Facebook nor the names of its contributors may be used to + endorse or promote products derived from this software without specific + prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..0b33b8c --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,30 @@ +include MANIFEST.in +include *.txt THANKS README.rst +include setup.py Makefile +recursive-exclude * * + +recursive-include tables *.py *.pyx *.pxd +recursive-exclude tables *.c +recursive-include tables/tests *.h5 *.mat +recursive-include tables/nodes/tests *.h5 *.dat *.xbm +recursive-include src *.c *.h +recursive-include tables.egg-info * + +include hdf5-blosc/src/blosc_filter.? +include hdf5-blosc2/src/blosc2_filter.? +recursive-include c-blosc/blosc *.c *.h *.inc +recursive-include c-blosc/internal-complibs *.c *.cc *.h + +recursive-include LICENSES * +recursive-include utils * +include doc/Makefile doc/make.bat +recursive-include doc *.rst *.conf *.py *.*_t +recursive-include doc *.html *.js *.css *.png *.ico +recursive-include doc/source *.pdf objecttree.svg +#recursive-include doc/source *.pdf *.svg +recursive-include doc/scripts *.py +recursive-include doc/sphinxext * +recursive-exclude doc/build * +recursive-include examples *.py *.sh +recursive-include bench *.sh *.py *.txt *.h5 *.gnuplot +recursive-include contrib README *.py diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..e5720d5 --- /dev/null +++ b/Makefile @@ -0,0 +1,82 @@ +# This Makefile is only intended to prepare for distribution the PyTables +# sources exported from a repository. For building and installing PyTables, +# please use ``setup.py`` as described in the ``README.rst`` file. + +VERSION = $(shell grep "__version__ =" tables/_version.py | cut -f 3 -d ' ' | sed s/\"//g) +SRCDIRS = doc +GENERATED = ANNOUNCE.txt +PYTHON = python3 +PYPLATFORM = $(shell $(PYTHON) -c "from sysconfig import get_platform; print(get_platform())") +PYVER = $(shell $(PYTHON) -c "import sys; print(sys.implementation.cache_tag)") +PYBUILDDIR = $(PWD)/build/lib.$(PYPLATFORM)-$(PYVER) +OPT = PYTHONPATH="$(PYBUILDDIR)" +MD5SUM = md5sum + + +.PHONY: default dist sdist build check heavycheck clean distclean html latex requirements + +default: $(GENERATED) build + +dist: sdist html latex + cp RELEASE_NOTES.rst dist/RELEASE_NOTES-$(VERSION).rst + cp doc/build/latex/usersguide-$(VERSION).pdf dist/pytablesmanual-$(VERSION).pdf + tar cvzf dist/pytablesmanual-$(VERSION)-html.tar.gz doc/html + cd dist && \ + $(MD5SUM) -b tables-$(VERSION).tar.gz RELEASE_NOTES-$(VERSION).rst \ + pytablesmanual-$(VERSION).pdf \ + pytablesmanual-$(VERSION)-html.tar.gz > pytables-$(VERSION).md5 && \ + cd - + +sdist: $(GENERATED) + # $(RM) -r MANIFEST tables/__pycache__ tables/*/__pycache__ + # $(RM) tables/_comp_*.c tables/*extension.c + # $(RM) tables/*.so + $(PYTHON) -m build --sdist + +clean: + $(RM) -r MANIFEST build dist tmp tables/__pycache__ doc/_build + $(RM) bench/*.h5 bench/*.prof + $(RM) -r examples/*.h5 examples/raw + $(RM) -r *.egg-info + $(RM) $(GENERATED) tables/*.so a.out + find . '(' -name '*.py[co]' -o -name '*~' ')' -exec rm '{}' ';' + for srcdir in $(SRCDIRS) ; do $(MAKE) $(OPT) -C $$srcdir $@ ; done + +distclean: clean + $(RM) tables/_comp_*.c tables/*extension.c + $(RM) doc/usersguide-*.pdf + $(RM) -r doc/html + $(RM) -r .pytest_cache .mypy_cache + # git clean -fdx + +html: build + $(MAKE) $(OPT) -C doc html + $(RM) -r doc/html + cp -R doc/build/html doc/html + +latex: build + $(MAKE) $(OPT) -C doc latexpdf + $(RM) doc/usersguide-*.pdf + cp doc/build/latex/usersguide-$(VERSION).pdf doc + +ANNOUNCE.txt: ANNOUNCE.txt.in tables/__init__.py + cat "$<" | sed -e 's/@VERSION@/$(VERSION)/g' > "$@" + +build: + $(PYTHON) setup.py build + +check: build + # cd build/lib.* && env PYTHONPATH=. $(PYTHON) -m pytest --doctest-only --pyargs tables -k "not AttributeSet" + cd build/lib.* && env PYTHONPATH=. $(PYTHON) tables/tests/test_all.py + +heavycheck: build + cd build/lib.* && env PYTHONPATH=. $(PYTHON) tables/tests/test_all.py --heavy + +requirements: \ + requirements.txt \ + requirements-docs.txt \ + .github/workflows/requirements/build-requirements.txt \ + .github/workflows/requirements/wheels-requirements.txt + +%.txt: %.in + pip-compile -U --allow-unsafe --generate-hashes --strip-extras $< diff --git a/PKG-INFO b/PKG-INFO new file mode 100644 index 0000000..2926698 --- /dev/null +++ b/PKG-INFO @@ -0,0 +1,47 @@ +Metadata-Version: 2.1 +Name: tables +Version: 3.10.2 +Summary: Hierarchical datasets for Python +Author: Francesc Alted, Ivan Vilata, Antonio Valentino, Anthony Scopatz, et al. +Author-email: pytables@pytables.org +Maintainer-email: PyTables maintainers +License: BSD 3-Clause License +Project-URL: homepage, http://www.pytables.org +Project-URL: documentation, http://www.pytables.org +Project-URL: repository, https://github.com/PyTables/PyTables +Project-URL: changelog, http://www.pytables.org/release_notes.html +Project-URL: tracker, https://github.com/PyTables/PyTables/issues +Keywords: hdf5 +Platform: any +Classifier: Development Status :: 5 - Production/Stable +Classifier: Intended Audience :: Developers +Classifier: Intended Audience :: Information Technology +Classifier: Intended Audience :: Science/Research +Classifier: License :: OSI Approved :: BSD License +Classifier: Operating System :: Microsoft :: Windows +Classifier: Operating System :: Unix +Classifier: Programming Language :: Python +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3 :: Only +Classifier: Programming Language :: Python :: 3.11 +Classifier: Programming Language :: Python :: 3.12 +Classifier: Programming Language :: Python :: 3.13 +Classifier: Topic :: Database +Classifier: Topic :: Software Development :: Libraries :: Python Modules +Requires-Python: >=3.11 +Description-Content-Type: text/x-rst +License-File: LICENSE.txt +Requires-Dist: numpy>=1.20.0 +Requires-Dist: numexpr>=2.6.2 +Requires-Dist: packaging +Requires-Dist: py-cpuinfo +Requires-Dist: blosc2>=2.3.0 +Requires-Dist: typing-extensions>=4.4.0 + +PyTables is a package for managing hierarchical datasets and +designed to efficiently cope with extremely large amounts of +data. PyTables is built on top of the HDF5 library and the +NumPy package and features an object-oriented interface +that, combined with C-code generated from Cython sources, +makes of it a fast, yet extremely easy to use tool for +interactively save and retrieve large amounts of data. diff --git a/README.rst b/README.rst new file mode 100644 index 0000000..a75b7e7 --- /dev/null +++ b/README.rst @@ -0,0 +1,135 @@ +=========================================== + PyTables: hierarchical datasets in Python +=========================================== + +.. image:: https://badges.gitter.im/Join%20Chat.svg + :alt: Join the chat at https://gitter.im/PyTables/PyTables + :target: https://gitter.im/PyTables/PyTables + +.. image:: https://github.com/PyTables/PyTables/workflows/CI/badge.svg + :target: https://github.com/PyTables/PyTables/actions?query=workflow%3ACI + +.. image:: https://img.shields.io/pypi/v/tables.svg + :target: https://pypi.org/project/tables/ + +.. image:: https://img.shields.io/pypi/pyversions/tables.svg + :target: https://pypi.org/project/tables/ + +.. image:: https://img.shields.io/pypi/l/tables + :target: https://github.com/PyTables/PyTables/ + + +:URL: http://www.pytables.org/ + + +PyTables is a package for managing hierarchical datasets, designed +to efficiently cope with extremely large amounts of data. + +It is built on top of the HDF5 library and the NumPy package. It +features an object-oriented interface that, combined with C extensions +for the performance-critical parts of the code (generated using +Cython), makes it a fast, yet extremely easy to use tool for +interactively saving and retrieving very large amounts of data. One +important feature of PyTables is that it optimizes memory and disk +resources so that they take much less space (between 3 to 5 times +and more if the data is compressible) than other solutions, like for +example, relational or object-oriented databases. + +State-of-the-art compression +---------------------------- + +PyTables supports the `Blosc compressor `_ out of the box. +This allows for extremely high compression speed, while keeping decent +compression ratios. By doing so, I/O can be accelerated by a large extent, and +you may end up achieving higher performance than the bandwidth provided by your +I/O subsystem. See the +`Tuning The Chunksize section of the Optimization Tips chapter +`_ +of the user documentation for some benchmarks. + +Not a RDBMS replacement +----------------------- + +PyTables is not designed to work as a relational database replacement, +but rather as a teammate. If you want to work with large datasets of +multidimensional data (for example, for multidimensional analysis), or +just provide a categorized structure for some portions of your +cluttered RDBS, then give PyTables a try. It works well for storing +data from data acquisition systems, simulation software, network +data monitoring systems (for example, traffic measurements of IP +packets on routers), or as a centralized repository for system logs, +to name only a few possible use cases. + +Tables +------ + +A table is defined as a collection of records whose values are stored +in fixed-length fields. All records have the same structure, and all +values in each field have the same data type. The terms "fixed-length" +and strict "data types" seem to be a strange requirement for an +interpreted language like Python, but they serve a useful function if +the goal is to save very large quantities of data (such as +generated by many scientific applications, for example) in an +efficient manner that reduces demand on CPU time and I/O. + +Arrays +------ + +There are other useful objects like arrays, enlargeable arrays, or +variable-length arrays that can cope with different use cases on your +project. + +Easy to use +----------- + +One of the principal objectives of PyTables is to be user-friendly. +In addition, many different iterators have been implemented to +make interactive work as productive as possible. + +Platforms +--------- + +We use Linux on top of Intel32 and Intel64 boxes as the main +development platforms, but PyTables should be easy to compile/install +on other UNIX (including macOS) or Windows machines. + +Compiling +--------- + +To compile PyTables, you will need a recent version of the HDF5 +(C flavor) library, the Zlib compression library, and the NumPy and +Numexpr packages. Besides, PyTables comes with support for the Blosc, LZO, +and bzip2 compressor libraries. Blosc is mandatory, but PyTables comes +with Blosc sources so, although it is recommended to have Blosc +installed in your system, you don't absolutely need to install it +separately. LZO and bzip2 compression libraries are, however, +optional. + +Make sure you have HDF5 version 1.10.5 or above. On Debian-based Linux +distributions, you can install it with:: + + $ sudo apt install libhdf5-serial-dev + +Installation +------------ + +1. Install with `pip `_: + + $ python3 -m pip install tables + +2. To run the test suite:: + + $ python3 -m tables.tests.test_all + + If there is some test that does not pass, please send us the + complete output using the + `GitHub Issue Tracker `_. + + +**Enjoy data!** -- The PyTables Team + +.. Local Variables: +.. mode: text +.. coding: utf-8 +.. fill-column: 70 +.. End: diff --git a/THANKS b/THANKS new file mode 100644 index 0000000..662f929 --- /dev/null +++ b/THANKS @@ -0,0 +1,85 @@ +March 2009 + +We would like to thank the people have contributed directly or +indirectly to PyTables. + +Scott Prater for editing the user's manual in order to make it more +readable in english, as well as conducting the tests of PyTables on +Solaris. + +Alan McIntyre for porting PyTables to Windows. + +John Nielsen for suggesting improvements and delivering code for +completely avoid the recursion algorithms and allowing pytables to +bypass the ~1000 levels of deepness that Python recursion limit +imposed. + +Tom Hedley for providing a nice patch for supporting complex datatypes +for Arrays, Errays and VLArrays. This was the root for the support of +complex types in Tables as well. + +Shack Toms for providing a Python version of the nextafter and +nextafterf math functions that despite the fact they are standard in +C99 standard, they are not at the official places in Microsoft VC++ +6.x nor VC++ 7.x. + +Jeff Whitaker for providing the NetCDF module and the utility for +converting netCDF files to HDF5 (nctoh5). + +Norbert Nemec for providing several interesting patches. + +Andrew Straw for suggesting to bracket the most intensive parts of +PyTables with BEGIN_ALLOW_THREADS and END_ALLOW_THREADS. That will +allow much better performance of PyTables apps in mutiprocessors +platforms. + +Antonio Valentino for providing several patches for supporting +native multidimensional attributes and the CArray object. + +Ashley Walsh, for reporting several problems and fixes. It has helped +testing OSX platform, specially UCL compressor issues. + +Russel Howe, for reporting and providing an initial patch for a nasty +memory leak when reading VLArray types. + +The HDF5 team at NCSA for making such an excellent library for data +persistence, and specially Pedro Vicente, Quincey Koziol and Elena +Pourmal, for quickly including my suggested patches to the HDF5_HL and +solving the reported bugs in HDF5 library. + +Todd Miller and Perry Greenfield for promptly helping me to understand +many of the intricacies of the numarray package and Jin-chung Hsu for +discussions on recarray module (now numarray.records module). They +have been very receptive and promptly worked-out most of the +improvements in numarray (specially in the records module) that were +necessary for PyTables. + +Travis Oliphant for its impressive work and responsiveness with NumPy. + +Evan Prodromou for his lrucache package, a very sleek implementation +of an LRU queue. He had a very helpful attitude with the licensing +and technical issues. + +Gerard Vermeulen for Windows/MSVS-2005 testing. + +Enric Cervera for testing the binaries for MacOSX/Intel. + +Daniel Bungert, Steve Langasek and Alexandre Fayolle for their support +in creating Debian packages for PyTables. + +Greg Ewing for writing the excelent Pyrex tool and allowing to beginners +like me to quickly and safely start writing Python extensions. He was +also very responsive about questions on Pyrex. + +Stefan Behnel, Robert Bradshaw, and Dag Sverre Seljebotn for their +impressive work with Cython. + +Andrew Collette, for his excellent work on the h5py project, from +which PyTables starts to stole ideas (and code too ;-). + +Guido, you know who. + +And last, but definitely not least!, + +To those companies that are supporting the PyTables project with +contracts. diff --git a/bench/100-trillion-baby.py b/bench/100-trillion-baby.py new file mode 100644 index 0000000..c926123 --- /dev/null +++ b/bench/100-trillion-baby.py @@ -0,0 +1,87 @@ +import sys +from time import time + +import numpy as np + +import tables as tb + +N = 9 # 9 billion rows +# N = 1000 # 1 trillion rows +# N = 100_000 # 100 trillion rows +NBUNCH = 1_000_000_000 +NREADS = 100_000 +filename = "100-trillion-baby.h5" + + +class Particle(tb.IsDescription): + lat = tb.Int32Col() # integer + lon = tb.Int32Col() # integer + time = tb.Int32Col() # integer + precip = tb.Float32Col() # float + solar = tb.Float32Col() # float + air = tb.Float32Col() # float + snow = tb.Float32Col() # float + wind = tb.Float32Col() # float + + +if len(sys.argv) > 1 and sys.argv[1] == "w": + # Open a file in "w"rite mode + print(f"Creating table with {NBUNCH * N // 1000_000_000} Grows...", end="") + t0 = time() + fileh = tb.open_file(filename, mode="w", pytables_sys_attrs=False) + # Create a new table in newgroup group + table = fileh.create_table( + fileh.root, + "table", + Particle, + "100 trillion rows baby", + tb.Filters(complevel=1, complib="blosc2:zstd", shuffle=1), + expectedrows=NBUNCH * N, + ) + # chunkshape=2**20) + # A bunch of particles + particles = np.zeros(NBUNCH, dtype=table.dtype) + + # Fill the table with N chunks of particles + for i in range(N): + table.append(particles) + table.flush() + t = time() - t0 + print( + f"\t{t:.3f}s ({table.nrows * table.dtype.itemsize / t / 2**30:.1f} GB/s)" + ) + fileh.close() + +fileh = tb.open_file(filename, "r") +table = fileh.root.table + +t0 = time() +idxs_to_read = np.random.randint(0, N * NBUNCH, NREADS) +# print(f"Time to create indexes: {time() - t0:.3f}s") + +print(f"Random read of {NREADS // 1_000} Krows...", end="") +t0 = time() +for i in idxs_to_read: + row = table[i] +t = time() - t0 +print(f"\t{t:.3f}s ({t / NREADS * 1e6:.1f} us/read)") + +# print(f"Serial read of {table.nrows // 1000_000_000} Grows...", end="") +# t0 = time() +# nrows_chunk = table.chunkshape[0] +# nchunks = table.nrows // nrows_chunk +# for i in range(nchunks): +# chunk = table.read(i * nrows_chunk, (i + 1) * nrows_chunk) +# t = time() - t0 +# print(f"\t{t:.3f}s ({table.nrows * table.dtype.itemsize / t / 2**30:.1f} GB/s)") + +print(f"Query of {table.nrows // 1000_000_000} Grows...", end="") +t0 = time() +res = sum(x["pressure"] for x in table.where("(lat > 10)")) +# res = sum(1 for x in table.where("(lat > 10)")) +t = time() - t0 +print( + f"\t\t{t:.3f}s ({table.nrows * table.dtype.itemsize / t / 2**30:.1f} GB/s)" +) + +fileh.close() diff --git a/bench/LRU-experiments.py b/bench/LRU-experiments.py new file mode 100644 index 0000000..b2389b1 --- /dev/null +++ b/bench/LRU-experiments.py @@ -0,0 +1,100 @@ +# Testbed to perform experiments in order to determine best values for +# the node numbers in LRU cache. Tables version. + +from time import perf_counter as clock + +import tables as tb + +print("PyTables version-->", tb.__version__) + +filename = "/tmp/junk-tables-100.h5" +NLEAVES = 2000 +NROWS = 1000 + + +class Particle(tb.IsDescription): + name = tb.StringCol(16, pos=1) # 16-character String + lati = tb.Int32Col(pos=2) # integer + longi = tb.Int32Col(pos=3) # integer + pressure = tb.Float32Col(pos=4) # float (single-precision) + temperature = tb.Float64Col(pos=5) # double (double-precision) + + +def create_junk(): + # Open a file in "w"rite mode + fileh = tb.open_file(filename, mode="w") + # Create a new group + group = fileh.create_group(fileh.root, "newgroup") + + for i in range(NLEAVES): + # Create a new table in newgroup group + table = fileh.create_table( + group, "table" + str(i), Particle, "A table", tb.Filters(1) + ) + particle = table.row + print("Creating table-->", table._v_name) + + # Fill the table with particles + for i in range(NROWS): + # This injects the row values. + particle.append() + table.flush() + + # Finally, close the file + fileh.close() + + +def modify_junk_lru(): + fileh = tb.open_file(filename, "a") + group = fileh.root.newgroup + for j in range(5): + print("iter -->", j) + for tt in fileh.walk_nodes(group): + if isinstance(tt, tb.Table): + pass + # for row in tt: + # pass + fileh.close() + + +def modify_junk_lru2(): + fileh = tb.open_file(filename, "a") + group = fileh.root.newgroup + for j in range(20): + t1 = clock() + for i in range(100): + # print("table-->", tt._v_name) + _ = getattr(group, "table" + str(i)) + # for row in _: + # pass + print(f"iter and time --> {j + 1} {clock() - t1:.3f}") + fileh.close() + + +def modify_junk_lru3(): + fileh = tb.open_file(filename, "a") + group = fileh.root.newgroup + for j in range(3): + t1 = clock() + for tt in fileh.walk_nodes(group, "Table"): + tt.attrs.TITLE + for row in tt: + pass + print(f"iter and time --> {j + 1} {clock() - t1:.3f}") + fileh.close() + + +if 1: + # create_junk() + # modify_junk_LRU() # uses the iterator version (walk_nodes) + # modify_junk_LRU2() # uses a regular loop (getattr) + modify_junk_lru3() # uses a regular loop (getattr) +else: + import pstats + import profile + + profile.run("modify_junk_LRU2()", "modify.prof") + stats = pstats.Stats("modify.prof") + stats.strip_dirs() + stats.sort_stats("time", "calls") + stats.print_stats() diff --git a/bench/LRU-experiments2.py b/bench/LRU-experiments2.py new file mode 100644 index 0000000..48bc784 --- /dev/null +++ b/bench/LRU-experiments2.py @@ -0,0 +1,59 @@ +# Testbed to perform experiments in order to determine best values for +# the node numbers in LRU cache. Arrays version. + +from time import perf_counter as clock + +import tables as tb + +print("PyTables version-->", tb.__version__) + +filename = "/tmp/junk-array.h5" +NOBJS = 1000 + + +def create_junk(): + fileh = tb.open_file(filename, mode="w") + for i in range(NOBJS): + fileh.create_array(fileh.root, "array" + str(i), [1]) + fileh.close() + + +def modify_junk_lru(): + fileh = tb.open_file(filename, "a") + group = fileh.root + for j in range(5): + print("iter -->", j) + for tt in fileh.walk_nodes(group): + if isinstance(tt, tb.Array): + # d = tt.read() + pass + + fileh.close() + + +def modify_junk_lru2(): + fileh = tb.open_file(filename, "a") + group = fileh.root + for j in range(5): + t1 = clock() + for i in range(100): # The number + # print("table-->", tt._v_name) + _ = getattr(group, "array" + str(i)) + # d = _.read() + print(f"iter and time --> {j + 1} {clock() - t1:.3f}") + fileh.close() + + +if 1: + # create_junk() + # modify_junk_LRU() # uses the iterador version (walk_nodes) + modify_junk_lru2() # uses a regular loop (getattr) +else: + import pstats + import profile + + profile.run("modify_junk_LRU2()", "modify.prof") + stats = pstats.Stats("modify.prof") + stats.strip_dirs() + stats.sort_stats("time", "calls") + stats.print_stats() diff --git a/bench/LRUcache-node-bench.py b/bench/LRUcache-node-bench.py new file mode 100644 index 0000000..f2a1655 --- /dev/null +++ b/bench/LRUcache-node-bench.py @@ -0,0 +1,83 @@ +import sys +from time import perf_counter as clock + +import numpy as np + +import tables as tb + +# import psyco + +filename = "/tmp/LRU-bench.h5" +nodespergroup = 250 +niter = 100 + +print("nodespergroup:", nodespergroup) +print("niter:", niter) + +if len(sys.argv) > 1: + NODE_CACHE_SLOTS = int(sys.argv[1]) + print("NODE_CACHE_SLOTS:", NODE_CACHE_SLOTS) +else: + NODE_CACHE_SLOTS = tb.parameters.NODE_CACHE_SLOTS +f = tb.open_file(filename, "w", node_cache_slots=NODE_CACHE_SLOTS) +g = f.create_group("/", "NodeContainer") +print("Creating nodes") +for i in range(nodespergroup): + f.create_array(g, "arr%d" % i, [i]) +f.close() + +f = tb.open_file(filename) + + +def iternodes(): + # for a in f.root.NodeContainer: + # pass + indices = np.random.randn(nodespergroup * niter) * 30 + nodespergroup / 2 + indices = indices.astype("i4").clip(0, nodespergroup - 1) + g = f.get_node("/", "NodeContainer") + for i in indices: + _ = f.get_node(g, "arr%d" % i) + # print("a-->", _) + + +print("reading nodes...") +# First iteration (put in LRU cache) +t1 = clock() +for a in f.root.NodeContainer: + pass +print(f"time (init cache)--> {clock() - t1:.3f}") + + +def time_lru(): + # Next iterations + t1 = clock() + # for i in range(niter): + # iternodes() + iternodes() + print(f"time (from cache)--> {(clock() - t1) / niter:.3f}") + + +def profile(verbose=False): + import pstats + import cProfile + + cProfile.run("timeLRU()", "out.prof") + stats = pstats.Stats("out.prof") + stats.strip_dirs() + stats.sort_stats("time", "calls") + if verbose: + stats.print_stats() + else: + stats.print_stats(20) + + +# profile() +# psyco.bind(timeLRU) +time_lru() + +f.close() + +# for N in 0 4 8 16 32 64 128 256 512 1024 2048 4096; do +# env PYTHONPATH=../build/lib.linux-x86_64-2.7 \ +# python LRUcache-node-bench.py $N; +# done diff --git a/bench/b2nd_compare_getslice.py b/bench/b2nd_compare_getslice.py new file mode 100644 index 0000000..6aa8e16 --- /dev/null +++ b/bench/b2nd_compare_getslice.py @@ -0,0 +1,284 @@ +####################################################################### +# Copyright (c) 2019-present, Blosc Development Team +# Copyright (c) 2023, PyTables Developers +# All rights reserved. +# +# This source code is licensed under a BSD-style license (found in the +# LICENSE file in the root directory of this source tree) +####################################################################### + +# Benchmark for comparing speeds of getitem of hyperplanes on a +# multidimensional array and using different backends: +# blosc2, PyTables optimized, PyTables filter, and HDF5 +# In brief, each approach has its own strengths and weaknesses. +# +# Usage: pass any argument for testing the in-memory backends. +# Else, only persistent containers will be tested. +# +# Uncomment the desired "chunks" value below and run the script +# to get the results with the given chunk size. +# Blosc2 results are only kept as a reference. +# +# Based on "bench/ndarray/compare_getslice.py" from python-blosc2. + +import os +import sys +import math +from time import time + +import h5py +import numpy as np +import blosc2 +import hdf5plugin + +import tables + +persistent = True if len(sys.argv) == 1 else False +if persistent: + print("Testing the persistent backends") +else: + print("Testing the in-memory backends") + +# ## Benchmark parameters + +# Dimensions and type properties for the arrays +# 3D +# shape = (1000, 2000, 250) +# chunks = (50, 500, 50) +# blocks = (10, 100, 25) + +# 4D +shape = (50, 100, 300, 250) +# (uncomment the desired one) +# chunks = (10, 25, 50, 50) # PARAM: small chunk +chunks = (10, 25, 150, 100) # PARAM: big chunk (fits in 32M L3) +blocks = (10, 25, 32, 32) + +# Smaller sizes (for quick testing) +# shape = (100, 200, 250) +# chunks = (50, 50, 50) +# blocks = (10, 10, 25) + +# shape = (50, 100, 30, 25) +# chunks = (10, 25, 20, 5) +# blocks = ( 3, 5, 10, 2) + +dtype = np.dtype(np.int64) + +dset_size = math.prod(shape) * dtype.itemsize +# Compression properties +# (LZ4/8 provides a blocksize which fits in 2M L2, +# see "examples/get_blocksize.c" in C-Blosc2) +clevel = 8 +cname = "lz4" +b2_filters = [blosc2.Filter.SHUFFLE] +cparams = { + "codec": blosc2.Codec.LZ4, + "clevel": clevel, + "filters": b2_filters, + "filters_meta": [0], +} +tables_filters = tables.Filters( + complevel=clevel, complib="blosc2:%s" % cname, shuffle=True +) +h5py_filters = hdf5plugin.Blosc2( + clevel=clevel, cname=cname, filters=hdf5plugin.Blosc2.SHUFFLE +) + +print( + f"Conf: {dtype} shape={shape} chunks={chunks} blocks={blocks} " + f"cname={cname} clevel={clevel} filters={b2_filters} " + f"nthreads={os.environ.get('BLOSC_NTHREADS', '1')}" +) + +# ## No more tuning below + +blocksize = int(np.prod(blocks)) if blocks else 0 + +fname_b2nd = None +fname_tables = "tables.h5" +fname_h5py = "h5py.h5" +if persistent: + fname_b2nd = "compare_getslice.b2nd" + blosc2.remove_urlpath(fname_b2nd) + fname_tables = "compare_getslice_tables.h5" + blosc2.remove_urlpath(fname_tables) + fname_h5py = "compare_getslice_h5py.h5" + blosc2.remove_urlpath(fname_h5py) + +# Create datasets in different formats +# content = np.random.normal(0, 1, int(np.prod(shape)), dtype=dtype).reshape(shape) +# content = np.linspace(0, 1, int(np.prod(shape)), dtype=dtype).reshape(shape) +# (random numbers reduce the effect of similar values in deeper dimensions) +rng = np.random.default_rng() +size = np.prod(shape) +content = rng.integers(low=0, high=10000, size=size, dtype=dtype).reshape( + shape +) + +print("\nCreating datasets...") +# Create and fill a NDArray +t0 = time() +b2 = blosc2.empty( + shape, + dtype=content.dtype, + chunks=chunks, + blocks=blocks, + urlpath=fname_b2nd, + cparams=cparams, +) +b2[:] = content +t = time() - t0 +speed = dset_size / (t * 2**20) +cratio = b2.schunk.cratio +print( + f"Time for filling array (blosc2): {t:.3f} s ({speed:.2f} M/s) ; cratio: {cratio:.1f}x" +) + +# Create and fill an HDF5 array (PyTables) +t0 = time() +if persistent: + h5f = tables.open_file(fname_tables, "w") +else: + h5f = tables.open_file( + fname_tables, "w", driver="H5FD_CORE", driver_core_backing_store=0 + ) +h5ca = h5f.create_carray( + h5f.root, "carray", filters=tables_filters, chunkshape=chunks, obj=content +) +t = time() - t0 +speed = dset_size / (t * 2**20) +cratio = dset_size / h5ca.size_on_disk +print( + f"Time for filling array (hdf5, tables): {t:.3f} s ({speed:.2f} M/s) ; cratio: {cratio:.1f}x" +) + +# Create and fill an HDF5 array (h5py) +t0 = time() +if persistent: + h5pyf = h5py.File(fname_h5py, "w") +else: + h5pyf = h5py.File(fname_h5py, "w", driver="core", backing_store=False) +h5d = h5pyf.create_dataset( + "dataset", dtype=content.dtype, data=content, chunks=chunks, **h5py_filters +) +t = time() - t0 +speed = dset_size / (t * 2**20) +if persistent: + num_blocks = os.stat(fname_h5py).st_blocks + # block_size = os.statvfs(fname_h5py).f_bsize + size_on_disk = num_blocks * 512 + cratio = dset_size / size_on_disk + print( + f"Time for filling array (hdf5, h5py): {t:.3f} s ({speed:.2f} M/s) ; cratio: {cratio:.1f}x" + ) +else: + print( + f"Time for filling array (hdf5, h5py): {t:.3f} s ({speed:.2f} M/s) ; cratio: Not avail" + ) + +# Complete reads +print("\nComplete reads...") +t0 = time() +r = b2[:] +t = time() - t0 +speed = dset_size / (t * 2**20) +print(f"Time for complete read (blosc2): {t:.3f} s ({speed:.2f} M/s)") + +t0 = time() +r = h5ca[:] +t = time() - t0 +speed = dset_size / (t * 2**20) +print(f"Time for complete read (hdf5, tables): {t:.3f} s ({speed:.2f} M/s)") + +t0 = time() +r = h5d[:] +t = time() - t0 +speed = dset_size / (t * 2**20) +print(f"Time for complete read (hdf5, h5py): {t:.3f} s ({speed:.2f} M/s)") + +# Reading by slices +print("\nReading by slices...") +# The coordinates for random planes +planes_idx = np.random.randint(0, min(shape), 100) + + +def time_slices(dset, idx): + r = None + if dset.ndim == 3: + t0 = time() + if ndim == 0: + for i in idx: + r = dset[i, :, :] + elif ndim == 1: + for i in idx: + r = dset[:, i, :] + else: + for i in idx: + r = dset[:, :, i] + t = time() - t0 + size = r.size * dset.dtype.itemsize * len(idx) + return t, size / (t * 2**20) + elif dset.ndim == 4: + t0 = time() + if ndim == 0: + for i in idx: + r = dset[i, :, :, :] + elif ndim == 1: + for i in idx: + r = dset[:, i, :, :] + elif ndim == 2: + for i in idx: + r = dset[:, :, i, :] + else: + for i in idx: + r = dset[:, :, :, i] + t = time() - t0 + size = r.size * dset.dtype.itemsize * len(idx) + return t, size / (t * 2**20) + raise ValueError(f"ndim == {dset.ndim} is not supported") + + +for ndim in range(len(shape)): + print(f"Slicing in dim {ndim}...") + + # Slicing with blosc2 + t, speed = time_slices(b2, planes_idx) + print( + f"Time for reading with getitem (blosc2): {t:.3f} s ({speed:.2f} M/s)" + ) + + # Slicing with hdf5 (PyTables opt) + os.environ["BLOSC2_FILTER"] = "0" + t, speed = time_slices(h5ca, planes_idx) + print( + f"Time for reading with getitem (hdf5, tables opt): {t:.3f} s ({speed:.2f} M/s)" + ) + + # Slicing with hdf5 (PyTables noopt) + os.environ["BLOSC2_FILTER"] = "1" + t, speed = time_slices(h5ca, planes_idx) + print( + f"Time for reading with getitem (hdf5, tables noopt): {t:.3f} s ({speed:.2f} M/s)" + ) + + # Slicing with hdf5 (h5py opt) + os.environ["BLOSC2_FILTER"] = "0" + t, speed = time_slices(h5d, planes_idx) + print( + f"Time for reading with getitem (hdf5, h5py opt): {t:.3f} s ({speed:.2f} M/s)" + ) + + # Slicing with hdf5 (h5py noopt) + os.environ["BLOSC2_FILTER"] = "1" + t, speed = time_slices(h5d, planes_idx) + print( + f"Time for reading with getitem (hdf5, h5py noopt): {t:.3f} s ({speed:.2f} M/s)" + ) + +h5f.close() +h5pyf.close() +# if persistent: +# os.remove(fname_b2nd) +# os.remove(fname_tables) +# os.remove(fname_h5py) diff --git a/bench/b2nd_compare_getslice_plot.py b/bench/b2nd_compare_getslice_plot.py new file mode 100644 index 0000000..10107e4 --- /dev/null +++ b/bench/b2nd_compare_getslice_plot.py @@ -0,0 +1,441 @@ +import numpy as np +import plotly.graph_objects as go + +# Results (in MiB/s) for each dimension in the different runs, +# for an Intel i9 13900K. +big_chunk_blosc2 = np.array( + [ + [ + 1866.36, + 947.13, + 717.50, + 725.78, + ], + [ + 1844.19, + 956.72, + 748.86, + 660.97, + ], + [ + 1855.04, + 1039.93, + 645.57, + 785.81, + ], + [ + 1815.24, + 1026.64, + 678.65, + 719.13, + ], + [ + 1886.88, + 1047.65, + 671.98, + 586.63, + ], + [ + 1889.12, + 1062.84, + 693.26, + 801.35, + ], + [ + 1845.90, + 1053.75, + 686.43, + 628.18, + ], + [ + 1872.75, + 1046.85, + 671.72, + 762.23, + ], + ] +) + +big_chunk_pt_opt = np.array( + [ + [ + 814.35, + 522.08, + 44.50, + 123.54, + ], + [ + 716.30, + 359.84, + 37.94, + 67.25, + ], + [ + 1122.58, + 297.10, + 44.73, + 71.55, + ], + [ + 1227.71, + 348.46, + 110.33, + 185.12, + ], + [ + 1404.73, + 534.54, + 60.91, + 115.45, + ], + [ + 772.77, + 324.10, + 106.43, + 140.37, + ], + [ + 1092.98, + 309.96, + 44.77, + 129.94, + ], + [ + 1332.74, + 773.50, + 85.37, + 70.42, + ], + ] +) + +big_chunk_pt_filter = np.array( + [ + [ + 269.25, + 115.39, + 11.83, + 33.68, + ], + [ + 176.66, + 114.66, + 12.10, + 20.84, + ], + [ + 290.16, + 74.36, + 14.91, + 21.98, + ], + [ + 348.39, + 120.16, + 24.97, + 41.20, + ], + [ + 288.37, + 140.70, + 22.61, + 33.95, + ], + [ + 353.88, + 116.45, + 18.57, + 41.76, + ], + [ + 363.78, + 75.88, + 12.98, + 40.41, + ], + [ + 290.85, + 144.14, + 24.14, + 23.33, + ], + ] +) + +big_chunk_h5py_opt = np.array( + [ + [ + 679.19, + 582.07, + 46.72, + 58.60, + ], + [ + 516.82, + 262.60, + 23.59, + 64.86, + ], + [ + 782.64, + 251.48, + 49.08, + 73.75, + ], + [ + 576.70, + 421.06, + 91.56, + 132.57, + ], + [ + 683.82, + 513.72, + 97.08, + 135.89, + ], + [ + 510.25, + 553.92, + 104.91, + 145.52, + ], + [ + 913.89, + 252.16, + 44.53, + 142.06, + ], + [ + 919.99, + 265.99, + 113.57, + 94.69, + ], + ] +) + +big_chunk_h5py_filter = np.array( + [ + [ + 285.21, + 116.22, + 12.05, + 20.62, + ], + [ + 175.79, + 73.17, + 12.09, + 21.07, + ], + [ + 269.96, + 76.61, + 14.56, + 22.09, + ], + [ + 363.79, + 117.53, + 18.87, + 43.37, + ], + [ + 300.72, + 168.28, + 19.79, + 33.96, + ], + [ + 341.09, + 114.78, + 24.20, + 42.06, + ], + [ + 283.69, + 76.10, + 13.05, + 41.68, + ], + [ + 313.10, + 77.62, + 23.63, + 23.16, + ], + ] +) + +small_chunk_blosc2 = np.array( + [ + [ + 1713.62, + 851.09, + 505.51, + 255.75, + ], + [ + 1733.91, + 939.15, + 478.38, + 395.56, + ], + [ + 1708.76, + 873.59, + 500.82, + 352.36, + ], + ] +) + +small_chunk_pt_opt = np.array( + [ + [ + 135.39, + 128.22, + 62.25, + 52.27, + ], + [ + 129.55, + 130.86, + 68.71, + 56.25, + ], + [ + 115.83, + 129.80, + 67.27, + 58.50, + ], + ] +) + +small_chunk_pt_filter = np.array( + [ + [ + 102.94, + 95.90, + 39.99, + 44.96, + ], + [ + 112.29, + 76.40, + 50.20, + 39.35, + ], + [ + 119.12, + 74.92, + 46.89, + 39.45, + ], + ] +) + +small_chunk_h5py_opt = np.array( + [ + [ + 88.19, + 110.67, + 51.34, + 47.84, + ], + [ + 90.61, + 113.41, + 54.33, + 46.85, + ], + [ + 90.68, + 112.75, + 54.14, + 49.18, + ], + ] +) + +small_chunk_h5py_filter = np.array( + [ + [ + 112.20, + 90.30, + 50.97, + 57.10, + ], + [ + 103.25, + 95.34, + 40.47, + 35.66, + ], + [ + 113.42, + 89.75, + 35.20, + 45.05, + ], + ] +) + +dimensions = ["dim%d" % d for d in range(4)] +reduce = np.max # np.average + +data_big_chunk = [ + ("Python Blosc2", reduce(big_chunk_blosc2.T, axis=1)), + ("PyTables/Blosc2 filter", reduce(big_chunk_pt_filter.T, axis=1)), + ("PyTables/Blosc2 optimized", reduce(big_chunk_pt_opt.T, axis=1)), + ("h5py/Blosc2 filter", reduce(big_chunk_h5py_filter.T, axis=1)), + ("h5py/Blosc2 optimized", reduce(big_chunk_h5py_opt.T, axis=1)), +] + +data_small_chunk = [ + ("Python Blosc2", reduce(small_chunk_blosc2.T, axis=1)), + ("PyTables/Blosc2 filter", reduce(small_chunk_pt_filter.T, axis=1)), + ("PyTables/Blosc2 optimized", reduce(small_chunk_pt_opt.T, axis=1)), + ("h5py/Blosc2 filter", reduce(small_chunk_h5py_filter.T, axis=1)), + ("h5py/Blosc2 optimized", reduce(small_chunk_h5py_opt.T, axis=1)), +] + +MiB = 1024 * 1024 + +shape = (50, 100, 300, 250) +# [(chunksize, blocksize, typesize, results), ...] +data = [ + ((10, 25, 50, 50), (10, 25, 32, 32), 8, data_small_chunk), + ((10, 25, 150, 100), (10, 25, 32, 32), 8, data_big_chunk), +] + +for chunksize, blocksize, typesize, data_ in data: + fig = go.Figure( + data=[ + go.Bar( + name=d[0], + x=dimensions, + y=d[1], + # text=['%.2f' % v for v in d[1]], + ) + for d in data_ + ] + ) + # Change the bar mode + title = ( + "shape {} ({:.1f}G), chunk {} ({:.1f}M), block {} ({:.1f}M)".format( + "x".join(str(d) for d in shape), + np.prod(shape) * typesize / MiB / 1024, + "x".join(str(d) for d in chunksize), + np.prod(chunksize) * typesize / MiB, + "x".join(str(d) for d in blocksize), + np.prod(blocksize) * typesize / MiB, + ) + ) + fig.update_layout( + barmode="group", title_text=title, yaxis={"title": "Throughput (M/s)"} + ) + fig.update_layout(width=1280, height=720, font={"size": 18}) + fig.show() diff --git a/bench/bench-postgres-ranges.sh b/bench/bench-postgres-ranges.sh new file mode 100755 index 0000000..04cee3d --- /dev/null +++ b/bench/bench-postgres-ranges.sh @@ -0,0 +1,13 @@ +#!/bin/sh + +export PYTHONPATH=..${PYTHONPATH:+:$PYTHONPATH} + +pyopt="-O -u" +#qlvl="-Q8 -x" +#qlvl="-Q8" +qlvl="-Q7" +#size="500m" +size="1g" + +#python $pyopt indexed_search.py -P -c -n $size -m -v +python $pyopt indexed_search.py -P -i -n $size -m -v -sfloat $qlvl diff --git a/bench/bench-pytables-ranges.sh b/bench/bench-pytables-ranges.sh new file mode 100755 index 0000000..34ad5ce --- /dev/null +++ b/bench/bench-pytables-ranges.sh @@ -0,0 +1,30 @@ +#!/bin/sh + +#export LD_LIBRARY_PATH=$HOME/computacio/hdf5-1.10.2/hdf5/lib +export PYTHONPATH=..${PYTHONPATH:+:$PYTHONPATH} + +bench="python2.7 -O -u indexed_search.py" +flags="-T -m -v " +#sizes="1g 500m 200m 100m 50m 20m 10m 5m 2m 1m" +sizes="1g" +#sizes="1m" +working_dir="data.nobackup" +#working_dir="/scratch2/faltet" + +#for comprlvl in '-z0' '-z1 -llzo' '-z1 -lzlib' ; do +#for comprlvl in '-z6 -lblosc' '-z3 -lblosc' '-z1 -lblosc' ; do +for comprlvl in '-z5 -lblosc' ; do +#for comprlvl in '-z0' ; do + for optlvl in '-tfull -O9' ; do + #for optlvl in '-tultralight -O3' '-tlight -O6' '-tmedium -O6' '-tfull -O9'; do + #for optlvl in '-tultralight -O3'; do + #rm -f $working_dir/* # XXX esta ben posat?? + for mode in '-Q8 -i -s float' ; do + #for mode in -c '-Q7 -i -s float' ; do + #for mode in '-c -s float' '-Q8 -I -s float' '-Q8 -S -s float'; do + for size in $sizes ; do + $bench $flags $mode -n $size $optlvl $comprlvl -d $working_dir + done + done + done +done diff --git a/bench/bench-pytables.sh b/bench/bench-pytables.sh new file mode 100755 index 0000000..5756b3f --- /dev/null +++ b/bench/bench-pytables.sh @@ -0,0 +1,28 @@ +#!/bin/sh + +export LD_LIBRARY_PATH=$HOME/computacio/hdf5-1.10.1/hdf5/lib +#export PYTHONPATH=..${PYTHONPATH:+:$PYTHONPATH} + +bench="python2.7 -O -u indexed_search.py" +flags="-T -m -v -d data.nobackup" +#sizes="1m 2m 5m 10m 20m 50m 100m 200m 500m 1g" +sizes="2g 1g 500m 200m 100m 50m 20m 10m 5m 2m 1m 500k 200k 100k 50k 20k 10k 5k 2k 1k" +#sizes="1m 100k" + +#for optimlvl in 0 1 2 3 4 5 6 7 8 9 ; do +for idxtype in ultralight light medium full; do +#for idxtype in medium full; do + for optimlvl in 0 3 6 9; do + for compropt in '' '-z1 -lzlib' '-z1 -llzo' ; do + #for compropt in '-z1 -llzo' ; do + rm -rf data.nobackup/* # Atencio: esta correctament posat? + #for mode in -c '-i -s float' ; do + for mode in -c '-i' ; do + for size in $sizes ; do + $bench $flags $mode -n $size -O $optimlvl -t $idxtype $compropt + done + done + done + done +done +rm -rf data.nobackup diff --git a/bench/blosc.py b/bench/blosc.py new file mode 100644 index 0000000..83cf9ac --- /dev/null +++ b/bench/blosc.py @@ -0,0 +1,167 @@ +import sys +from time import perf_counter as clock +from pathlib import Path + +import numpy as np + +import tables as tb + +niter = 3 +dirname = "/scratch2/faltet/blosc-data/" +# expression = "a**2 + b**3 + 2*a*b + 3" +# expression = "a+b" +# expression = "a**2 + 2*a/b + 3" +# expression = "(a+b)**2 - (a**2 + b**2 + 2*a*b) + 1.1" +expression = "3*a-2*b+1.1" +shuffle = True + + +def create_file(kind, prec, synth): + prefix_orig = "cellzome/cellzome-" + iname = dirname + prefix_orig + "none-" + prec + ".h5" + f = tb.open_file(iname, "r") + + if prec == "single": + type_ = tb.Float32Atom() + else: + type_ = tb.Float64Atom() + + if synth: + prefix = "synth/synth-" + else: + prefix = "cellzome/cellzome-" + + for clevel in range(10): + oname = "%s/%s-%s%d-%s.h5" % (dirname, prefix, kind, clevel, prec) + # print "creating...", iname + f2 = tb.open_file(oname, "w") + + if kind in ["none", "numpy"]: + filters = None + else: + filters = tb.Filters( + complib=kind, complevel=clevel, shuffle=shuffle + ) + + for name in ["maxarea", "mascotscore"]: + col = f.get_node("/", name) + r = f2.create_carray("/", name, type_, col.shape, filters=filters) + if synth: + r[:] = np.arange(col.nrows, dtype=type_.dtype) + else: + r[:] = col[:] + f2.close() + if clevel == 0: + size = 1.5 * Path(oname).stat().st_size + f.close() + return size + + +def create_synth(kind, prec): + + prefix_orig = "cellzome/cellzome-" + iname = dirname + prefix_orig + "none-" + prec + ".h5" + f = tb.open_file(iname, "r") + + if prec == "single": + type_ = tb.Float32Atom() + else: + type_ = tb.Float64Atom() + + prefix = "synth/synth-" + for clevel in range(10): + oname = "%s/%s-%s%d-%s.h5" % (dirname, prefix, kind, clevel, prec) + # print "creating...", iname + f2 = tb.open_file(oname, "w") + + if kind in ["none", "numpy"]: + filters = None + else: + filters = tb.Filters( + complib=kind, complevel=clevel, shuffle=shuffle + ) + + for name in ["maxarea", "mascotscore"]: + col = f.get_node("/", name) + r = f2.create_carray("/", name, type_, col.shape, filters=filters) + if name == "maxarea": + r[:] = np.arange(col.nrows, dtype=type_.dtype) + else: + r[:] = np.arange(col.nrows, 0, dtype=type_.dtype) + + f2.close() + if clevel == 0: + size = 1.5 * Path(oname).stat().st_size + f.close() + return size + + +def process_file(kind, prec, clevel, synth): + + if kind == "numpy": + lib = "none" + else: + lib = kind + if synth: + prefix = "synth/synth-" + else: + prefix = "cellzome/cellzome-" + iname = "%s/%s-%s%d-%s.h5" % (dirname, prefix, kind, clevel, prec) + f = tb.open_file(iname, "r") + a_ = f.root.maxarea + b_ = f.root.mascotscore + + oname = "%s/%s-%s%d-%s-r.h5" % (dirname, prefix, kind, clevel, prec) + f2 = tb.open_file(oname, "w") + if lib == "none": + filters = None + else: + filters = tb.Filters(complib=lib, complevel=clevel, shuffle=shuffle) + if prec == "single": + type_ = tb.Float32Atom() + else: + type_ = tb.Float64Atom() + r = f2.create_carray("/", "r", type_, a_.shape, filters=filters) + + if kind == "numpy": + a2, b2 = a_[:], b_[:] + t0 = clock() + r = eval(expression, {"a": a2, "b": b2}) + print(f"{clock() - t0:5.2f}") + else: + expr = tb.Expr(expression, {"a": a_, "b": b_}) + expr.set_output(r) + expr.eval() + f.close() + f2.close() + size = Path(iname).stat().st_size + Path(oname).stat().st_size + return size + + +if __name__ == "__main__": + if len(sys.argv) > 3: + kind = sys.argv[1] + prec = sys.argv[2] + if sys.argv[3] == "synth": + synth = True + else: + synth = False + else: + print("3 parameters required") + sys.exit(1) + + # print "kind, precision, synth:", kind, prec, synth + + # print "Creating input files..." + size_orig = create_file(kind, prec, synth) + + # print "Processing files for compression levels in range(10)..." + for clevel in range(10): + t0 = clock() + ts = [] + for i in range(niter): + size = process_file(kind, prec, clevel, synth) + ts.append(clock() - t0) + t0 = clock() + ratio = size_orig / size + print(f"{min(ts):5.2f}, {ratio:5.2f}") diff --git a/bench/bsddb-table-bench.py b/bench/bsddb-table-bench.py new file mode 100644 index 0000000..7332e76 --- /dev/null +++ b/bench/bsddb-table-bench.py @@ -0,0 +1,262 @@ +#!/usr/bin/env python +# ##### WARNING ####### +# ## This script is obsoleted ### +# If you get it working again, please drop me a line +# F. Alted 2004-01-27 + +import sys +import struct + +import numpy as np +import psyco +import cPickle +from bsddb import db + +import tables as tb + + +# This class is accessible only for the examples +class Small(tb.IsDescription): + """Record descriptor. + + A record has several columns. They are represented here as class + attributes, whose names are the column names and their values will + become their types. The IsColDescr class will take care the user + will not add any new variables and that its type is correct. + + """ + + var1 = tb.StringCol(itemsize=16) + var2 = tb.Int32Col() + var3 = tb.Float64Col() + + +# Define a user record to characterize some kind of particles + + +class Medium(tb.IsDescription): + name = tb.StringCol(itemsize=16, pos=0) # 16-character String + # float1 = Float64Col(shape=2, dflt=2.3) + float1 = tb.Float64Col(dflt=1.3, pos=1) + float2 = tb.Float64Col(dflt=2.3, pos=2) + ADCcount = tb.Int16Col(pos=3) # signed short integer + grid_i = tb.Int32Col(pos=4) # integer + grid_j = tb.Int32Col(pos=5) # integer + pressure = tb.Float32Col(pos=6) # float (single-precision) + energy = tb.Float64Col(pos=7) # double (double-precision) + + +# Define a user record to characterize some kind of particles + + +class Big(tb.IsDescription): + name = tb.StringCol(itemsize=16) # 16-character String + # float1 = Float64Col(shape=32, dflt=np.arange(32)) + # float2 = Float64Col(shape=32, dflt=np.arange(32)) + float1 = tb.Float64Col(shape=32, dflt=range(32)) + float2 = tb.Float64Col(shape=32, dflt=[2.2] * 32) + ADCcount = tb.Int16Col() # signed short integer + grid_i = tb.Int32Col() # integer + grid_j = tb.Int32Col() # integer + pressure = tb.Float32Col() # float (single-precision) + energy = tb.Float64Col() # double (double-precision) + + +def create_file(filename, totalrows, recsize, verbose): + + # Open a 'n'ew file + dd = db.DB() + if recsize == "big": + isrec = tb.Description(Big) + elif recsize == "medium": + isrec = Medium() + else: + isrec = tb.Description(Small) + # dd.set_re_len(struct.calcsize(isrec._v_fmt)) # fixed length records + dd.open(filename, db.DB_RECNO, db.DB_CREATE | db.DB_TRUNCATE) + + rowswritten = 0 + # Get the record object associated with the new table + if recsize == "big": + isrec = Big() + arr = np.array(np.arange(32), type=np.float64) + arr2 = np.array(np.arange(32), type=np.float64) + elif recsize == "medium": + isrec = Medium() + arr = np.array(np.arange(2), type=np.float64) + else: + isrec = Small() + # print d + # Fill the table + if recsize == "big" or recsize == "medium": + d = { + "name": " ", + "float1": 1.0, + "float2": 2.0, + "ADCcount": 12, + "grid_i": 1, + "grid_j": 1, + "pressure": 1.9, + "energy": 1.8, + } + for i in range(totalrows): + # d['name'] = 'Particle: %6d' % (i) + # d['TDCcount'] = i % 256 + d["ADCcount"] = (i * 256) % (1 << 16) + if recsize == "big": + # d.float1 = np.array([i]*32, np.float64) + # d.float2 = np.array([i**2]*32, np.float64) + arr[0] = 1.1 + d["float1"] = arr + arr2[0] = 2.2 + d["float2"] = arr2 + pass + else: + d["float1"] = float(i) + d["float2"] = float(i) + d["grid_i"] = i + d["grid_j"] = 10 - i + d["pressure"] = float(i * i) + d["energy"] = d["pressure"] + dd.append(cPickle.dumps(d)) + # dd.append(struct.pack(isrec._v_fmt, + # d['name'], d['float1'], d['float2'], + # d['ADCcount'], + # d['grid_i'], d['grid_j'], + # d['pressure'], d['energy'])) + else: + d = {"var1": " ", "var2": 1, "var3": 12.1e10} + for i in range(totalrows): + d["var1"] = str(i) + d["var2"] = i + d["var3"] = 12.1e10 + dd.append(cPickle.dumps(d)) + # dd.append( + # struct.pack(isrec._v_fmt, d['var1'], d['var2'], d['var3'])) + + rowswritten += totalrows + + # Close the file + dd.close() + return (rowswritten, struct.calcsize(isrec._v_fmt)) + + +def read_file(filename, recsize, verbose): + # Open the HDF5 file in read-only mode + # fileh = shelve.open(filename, "r") + dd = db.DB() + if recsize == "big": + isrec = Big() + elif recsize == "medium": + isrec = Medium() + else: + isrec = Small() + # dd.set_re_len(struct.calcsize(isrec._v_fmt)) # fixed length records + # dd.set_re_pad('-') # sets the pad character... + # dd.set_re_pad(45) # ...test both int and char + dd.open(filename, db.DB_RECNO) + if recsize == "big" or recsize == "medium": + print(isrec._v_fmt) + c = dd.cursor() + rec = c.first() + e = [] + while rec: + record = cPickle.loads(rec[1]) + # record = struct.unpack(isrec._v_fmt, rec[1]) + # if verbose: + # print record + if record["grid_i"] < 20: + e.append(record["grid_j"]) + # if record[4] < 20: + # e.append(record[5]) + rec = next(c) + else: + print(isrec._v_fmt) + # e = [ t[1] for t in fileh[table] if t[1] < 20 ] + c = dd.cursor() + rec = c.first() + e = [] + while rec: + record = cPickle.loads(rec[1]) + # record = struct.unpack(isrec._v_fmt, rec[1]) + # if verbose: + # print record + if record["var2"] < 20: + e.append(record["var1"]) + # if record[1] < 20: + # e.append(record[2]) + rec = next(c) + + print("resulting selection list ==>", e) + print("last record read ==>", record) + print("Total selected records ==> ", len(e)) + + # Close the file (eventually destroy the extended type) + dd.close() + + +# Add code to test here +if __name__ == "__main__": + import getopt + from time import perf_counter as clock + + usage = ( + """usage: %s [-v] [-s recsize] [-i iterations] file + -v verbose + -s use [big] record, [medium] or [small] + -i sets the number of rows in each table\n""" + % sys.argv[0] + ) + + try: + opts, pargs = getopt.getopt(sys.argv[1:], "s:vi:") + except Exception: + sys.stderr.write(usage) + sys.exit(0) + + # if we pass too much parameters, abort + if len(pargs) != 1: + sys.stderr.write(usage) + sys.exit(0) + + # default options + recsize = "medium" + iterations = 100 + verbose = 0 + + # Get the options + for option in opts: + if option[0] == "-s": + recsize = option[1] + if recsize not in ["big", "medium", "small"]: + sys.stderr.write(usage) + sys.exit(0) + elif option[0] == "-i": + iterations = int(option[1]) + elif option[0] == "-v": + verbose = 1 + + # Catch the hdf5 file passed as the last argument + file = pargs[0] + + t1 = clock() + psyco.bind(create_file) + (rowsw, rowsz) = create_file(file, iterations, recsize, verbose) + t2 = clock() + tapprows = t2 - t1 + + t1 = clock() + psyco.bind(read_file) + read_file(file, recsize, verbose) + t2 = clock() + treadrows = t2 - t1 + + print(f"Rows written: {rowsw}, Row size: {rowsz}") + print(f"Time appending rows: {tapprows:.3f}") + if tapprows > 0.001: + print(f"Write rows/sec: {iterations / tapprows:.0f}") + print(f"Write KB/s: {rowsw * rowsz / (tapprows * 1024):.0f}") + print(f"Time reading rows: {treadrows:.3f}") + if treadrows > 0.001: + print(f"Read rows/sec: {iterations / treadrows:.0f}") + print(f"Read KB/s: {rowsw * rowsz / (treadrows * 1024):.0f}") diff --git a/bench/cacheout.py b/bench/cacheout.py new file mode 100644 index 0000000..54c12f5 --- /dev/null +++ b/bench/cacheout.py @@ -0,0 +1,13 @@ +# Program to clean out the filesystem cache +import numpy as np + +a = np.arange(1000 * 100 * 125, dtype="f8") # 100 MB of RAM +b = a * 3 # Another 100 MB +# delete the reference to the booked memory +del a +del b + +# Do a loop to fully recharge the python interpreter +j = 2 +for i in range(1000 * 1000): + j += i * 2 diff --git a/bench/chunkshape-bench.py b/bench/chunkshape-bench.py new file mode 100644 index 0000000..48b6b31 --- /dev/null +++ b/bench/chunkshape-bench.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python +# Benchmark the effect of chunkshapes in reading large datasets. +# You need at least PyTables 2.1 to run this! +# F. Alted + +from time import perf_counter as clock + +import numpy as np + +import tables as tb + +dim1, dim2 = 360, 6_109_666 +rows_to_read = range(0, 360, 36) + +print("=" * 32) +# Create the EArray +f = tb.open_file("/tmp/test.h5", "w") +a = f.create_earray( + f.root, "a", tb.Float64Atom(), shape=(dim1, 0), expectedrows=dim2 +) +print("Chunkshape for original array:", a.chunkshape) + +# Fill the EArray +t1 = clock() +zeros = np.zeros((dim1, 1), dtype="float64") +for i in range(dim2): + a.append(zeros) +tcre = clock() - t1 +thcre = dim1 * dim2 * 8 / (tcre * 1024 * 1024) +print(f"Time to append {a.nrows} rows: {tcre:.3f} sec ({thcre:.1f} MB/s)") + +# Read some row vectors from the original array +t1 = clock() +for i in rows_to_read: + r1 = a[i, :] +tr1 = clock() - t1 +thr1 = dim2 * len(rows_to_read) * 8 / (tr1 * 1024 * 1024) +print( + f"Time to read ten rows in original array: {tr1:.3f} sec ({thr1:.1f} MB/s)" +) + +print("=" * 32) +# Copy the array to another with a row-wise chunkshape +t1 = clock() +# newchunkshape = (1, a.chunkshape[0]*a.chunkshape[1]) +newchunkshape = (1, a.chunkshape[0] * a.chunkshape[1] * 10) # ten times larger +b = a.copy(f.root, "b", chunkshape=newchunkshape) +tcpy = clock() - t1 +thcpy = dim1 * dim2 * 8 / (tcpy * 1024 * 1024) +print("Chunkshape for row-wise chunkshape array:", b.chunkshape) +print(f"Time to copy the original array: {tcpy:.3f} sec ({thcpy:.1f} MB/s)") + +# Read the same ten rows from the new copied array +t1 = clock() +for i in rows_to_read: + r2 = b[i, :] +tr2 = clock() - t1 +thr2 = dim2 * len(rows_to_read) * 8 / (tr2 * 1024 * 1024) +print( + f"Time to read with a row-wise chunkshape: {tr2:.3f} sec ({thr2:.1f} MB/s)" +) +print("=" * 32) +print(f"Speed-up with a row-wise chunkshape: {tr1 / tr2:.1f}") + +f.close() diff --git a/bench/chunkshape-testing.py b/bench/chunkshape-testing.py new file mode 100644 index 0000000..9c31c9e --- /dev/null +++ b/bench/chunkshape-testing.py @@ -0,0 +1,134 @@ +#!/usr/bin/env python + +"""Simple benchmark for testing chunkshapes and nrowsinbuf.""" + +from time import perf_counter as clock + +import numpy as np + +import tables as tb + +L = 20 +N = 2000 +M = 30 +complevel = 1 + +recarray = np.empty(shape=2, dtype="(2,2,2)i4,(2,3,3)f8,i4,i8") + +f = tb.open_file("chunkshape.h5", mode="w") + +# t = f.create_table(f.root, 'table', recarray, "mdim recarray") + +# a0 = f.create_array(f.root, 'field0', recarray['f0'], "mdim int32 array") +# a1 = f.create_array(f.root, 'field1', recarray['f1'], "mdim float64 array") + +# c0 = f.create_carray(f.root, 'cfield0', +# tables.Int32Atom(), (2,2,2), +# "mdim int32 carray") +# c1 = f.create_carray(f.root, 'cfield1', +# tables.Float64Atom(), (2,3,3), +# "mdim float64 carray") + +f1 = tb.open_file("chunkshape1.h5", mode="w") +c1 = f.create_carray( + f1.root, + "cfield1", + tb.Int32Atom(), + (L, N, M), + "scalar int32 carray", + tb.Filters(complevel=0), +) + +t1 = clock() +c1[:] = np.empty(shape=(L, 1, 1), dtype="int32") +print("carray1 populate time:", clock() - t1) +f1.close() + + +f2 = tb.open_file("chunkshape2.h5", mode="w") +c2 = f.create_carray( + f2.root, + "cfield2", + tb.Int32Atom(), + (L, M, N), + "scalar int32 carray", + tb.Filters(complevel), +) + +t1 = clock() +c2[:] = np.empty(shape=(L, 1, 1), dtype="int32") +print("carray2 populate time:", clock() - t1) +f2.close() + +f0 = tb.open_file("chunkshape0.h5", mode="w") +e0 = f.create_earray( + f0.root, + "efield0", + tb.Int32Atom(), + (0, L, M), + "scalar int32 carray", + tb.Filters(complevel), + expectedrows=N, +) + +t1 = clock() +e0.append(np.empty(shape=(N, L, M), dtype="int32")) +print("earray0 populate time:", clock() - t1) +f0.close() + +f1 = tb.open_file("chunkshape1.h5", mode="w") +e1 = f.create_earray( + f1.root, + "efield1", + tb.Int32Atom(), + (L, 0, M), + "scalar int32 carray", + tb.Filters(complevel), + expectedrows=N, +) + +t1 = clock() +e1.append(np.empty(shape=(L, N, M), dtype="int32")) +print("earray1 populate time:", clock() - t1) +f1.close() + + +f2 = tb.open_file("chunkshape2.h5", mode="w") +e2 = f.create_earray( + f2.root, + "efield2", + tb.Int32Atom(), + (L, M, 0), + "scalar int32 carray", + tb.Filters(complevel), + expectedrows=N, +) + +t1 = clock() +e2.append(np.empty(shape=(L, M, N), dtype="int32")) +print("earray2 populate time:", clock() - t1) +f2.close() + +# t1=time() +# c2[:] = np.empty(shape=(M, N), dtype="int32") +# print "carray populate time:", time()-t1 + +# f3 = f.create_carray(f.root, 'cfield3', +# tables.Float64Atom(), (3,), +# "scalar float64 carray", chunkshape=(32,)) + +# e2 = f.create_earray(f.root, 'efield2', +# tables.Int32Atom(), (0, M), +# "scalar int32 carray", expectedrows=N) +# t1=time() +# e2.append(np.empty(shape=(N, M), dtype="int32")) +# print "earray populate time:", time()-t1 + +# t1=time() +# c2._f_copy(newname='cfield2bis') +# print "carray copy time:", time()-t1 +# t1=time() +# e2._f_copy(newname='efield2bis') +# print "earray copy time:", time()-t1 + +f.close() diff --git a/bench/collations.py b/bench/collations.py new file mode 100644 index 0000000..2765bd9 --- /dev/null +++ b/bench/collations.py @@ -0,0 +1,133 @@ +from time import perf_counter as clock + +import numpy as np + +import tables as tb + +N = 1000 * 1000 +NCOLL = 200 # 200 collections maximum + +# In order to have reproducible results +np.random.seed(19) + + +class Energies(tb.IsDescription): + collection = tb.UInt8Col() + energy = tb.Float64Col() + + +def fill_bucket(lbucket): + # c = np.random.normal(NCOLL/2, NCOLL/10, lbucket) + c = np.random.normal(NCOLL / 2, NCOLL / 100, lbucket) + e = np.arange(lbucket, dtype="f8") + return c, e + + +# Fill the table +t1 = clock() +f = tb.open_file("data.nobackup/collations.h5", "w") +table = f.create_table("/", "Energies", Energies, expectedrows=N) +# Fill the table with values +lbucket = 1000 # Fill in buckets of 1000 rows, for speed +for i in range(0, N, lbucket): + bucket = fill_bucket(lbucket) + table.append(bucket) +# Fill the remaining rows +bucket = fill_bucket(N % lbucket) +table.append(bucket) +f.close() +print(f"Time to create the table with {N} entries: {t1:.3f}") + +# Now, read the table and group it by collection +f = tb.open_file("data.nobackup/collations.h5", "a") +table = f.root.Energies + +######################################################### +# First solution: load the table completely in memory +######################################################### +t1 = clock() +t = table[:] # convert to structured array +coll1 = [] +collections = np.unique(t["collection"]) +for c in collections: + cond = t["collection"] == c + energy_this_collection = t["energy"][cond] + sener = energy_this_collection.sum() + coll1.append(sener) + print(c, " : ", sener) +del collections, energy_this_collection +print(f"Time for first solution: {clock() - t1:.3f}s") + +######################################################### +# Second solution: load all the collections in memory +######################################################### +t1 = clock() +collections = {} +for row in table: + c = row["collection"] + e = row["energy"] + if c in collections: + collections[c].append(e) + else: + collections[c] = [e] +# Convert the lists in numpy arrays +coll2 = [] +for c in sorted(collections): + energy_this_collection = np.array(collections[c]) + sener = energy_this_collection.sum() + coll2.append(sener) + print(c, " : ", sener) +del collections, energy_this_collection +print(f"Time for second solution: {clock() - t1:.3f}s") + +t1 = clock() +table.cols.collection.create_csindex() +# table.cols.collection.reindex() +print(f"Time for indexing: {clock() - t1:.3f}s") + +######################################################### +# Third solution: load each collection separately +######################################################### +t1 = clock() +coll3 = [] +for c in np.unique(table.col("collection")): + energy_this_collection = table.read_where( + "collection == c", field="energy" + ) + sener = energy_this_collection.sum() + coll3.append(sener) + print(c, " : ", sener) +del energy_this_collection +print(f"Time for third solution: {clock() - t1:.3f}s") + + +t1 = clock() +table2 = table.copy( + "/", + "EnergySortedByCollation", + overwrite=True, + sortby="collection", + propindexes=True, +) +print(f"Time for sorting: {clock() - t1:.3f}s") + +##################################################################### +# Fourth solution: load each collection separately. Sorted table. +##################################################################### +t1 = clock() +coll4 = [] +for c in np.unique(table2.col("collection")): + energy_this_collection = table2.read_where( + "collection == c", field="energy" + ) + sener = energy_this_collection.sum() + coll4.append(sener) + print(c, " : ", sener) + del energy_this_collection +print(f"Time for fourth solution: {clock() - t1:.3f}s") + + +# Finally, check that all solutions do match +assert coll1 == coll2 == coll3 == coll4 + +f.close() diff --git a/bench/copy-bench.py b/bench/copy-bench.py new file mode 100644 index 0000000..255d1bc --- /dev/null +++ b/bench/copy-bench.py @@ -0,0 +1,35 @@ +import sys +from time import perf_counter as clock + +import tables as tb + +if len(sys.argv) != 3: + print("usage: %s source_file dest_file", sys.argv[0]) +filesrc = sys.argv[1] +filedest = sys.argv[2] +filehsrc = tb.open_file(filesrc) +filehdest = tb.open_file(filedest, "w") +ntables = 0 +tsize = 0 +t1 = clock() +for group in filehsrc.walk_groups(): + if isinstance(group._v_parent, tb.File): + groupdest = filehdest.root + else: + pathname = group._v_parent._v_pathname + groupdest = filehdest.create_group( + pathname, group._v_name, title=group._v_title + ) + for table in filehsrc.list_nodes(group, classname="Table"): + print("copying table -->", table) + table.copy(groupdest, table.name) + ntables += 1 + tsize += table.nrows * table.rowsize +tsize_mb = tsize / (1024 * 1024) +ttime = clock() - t1 +print( + f"Copied {ntables} tables for a total of {tsize_mb:.1f} MB" + f" in {ttime:.3f} seconds ({tsize_mb / ttime:.1f} MB/s)" +) +filehsrc.close() +filehdest.close() diff --git a/bench/create-large-number-objects.py b/bench/create-large-number-objects.py new file mode 100644 index 0000000..100f25b --- /dev/null +++ b/bench/create-large-number-objects.py @@ -0,0 +1,45 @@ +"This creates an HDF5 file with a potentially large number of objects" + +import sys + +import numpy as np + +import tables as tb + +filename = sys.argv[1] + +# Open a new empty HDF5 file +fileh = tb.open_file(filename, mode="w") + +# nlevels -- Number of levels in hierarchy +# ngroups -- Number of groups on each level +# ndatasets -- Number of arrays on each group +# LR: Low ratio groups/datasets +# nlevels, ngroups, ndatasets = (3, 1, 1000) +# MR: Medium ratio groups/datasets +nlevels, ngroups, ndatasets = (3, 10, 100) +# nlevels, ngroups, ndatasets = (3, 5, 10) +# HR: High ratio groups/datasets +# nlevels, ngroups, ndatasets = (30, 10, 10) + +# Create an Array to save on disk +a = np.array([-1, 2, 4], np.int16) + +group = fileh.root +group2 = fileh.root +for k in range(nlevels): + for j in range(ngroups): + for i in range(ndatasets): + # Save the array on the HDF5 file + fileh.create_array( + group2, "array" + str(i), a, "Signed short array" + ) + # Create a new group + group2 = fileh.create_group(group, "group" + str(j)) + # Create a new group + group3 = fileh.create_group(group, "ngroup" + str(k)) + # Iterate over this new group (group3) + group = group3 + group2 = group3 + +fileh.close() diff --git a/bench/deep-tree-h5py.py b/bench/deep-tree-h5py.py new file mode 100644 index 0000000..23323bb --- /dev/null +++ b/bench/deep-tree-h5py.py @@ -0,0 +1,115 @@ +import random +from time import perf_counter as clock +from pathlib import Path + +import h5py +import numpy as np + +random.seed(2) + + +def show_stats(explain, tref): + "Show the used memory (only works for Linux 2.6.x)." + for line in Path("/proc/self/status").read_text().splitlines(): + if line.startswith("VmSize:"): + vmsize = int(line.split()[1]) + elif line.startswith("VmRSS:"): + vmrss = int(line.split()[1]) + elif line.startswith("VmData:"): + vmdata = int(line.split()[1]) + elif line.startswith("VmStk:"): + vmstk = int(line.split()[1]) + elif line.startswith("VmExe:"): + vmexe = int(line.split()[1]) + elif line.startswith("VmLib:"): + vmlib = int(line.split()[1]) + print("Memory usage: ******* %s *******" % explain) + print(f"VmSize: {vmsize:>7} kB\tVmRSS: {vmrss:>7} kB") + print(f"VmData: {vmdata:>7} kB\tVmStk: {vmstk:>7} kB") + print(f"VmExe: {vmexe:>7} kB\tVmLib: {vmlib:>7} kB") + tnow = clock() + print(f"WallClock time: {tnow - tref:.3f}") + return tnow + + +def populate(f, nlevels): + g = f + arr = np.zeros((10,), "f4") + for i in range(nlevels): + g["DS1"] = arr + g["DS2"] = arr + g.create_group("group2_") + g = g.create_group("group") + + +def getnode(f, nlevels, niter, range_): + for i in range(niter): + nlevel = random.randrange( + (nlevels - range_) / 2, (nlevels + range_) / 2 + ) + groupname = "" + for i in range(nlevel): + groupname += "/group" + groupname += "/DS1" + f[groupname] + + +if __name__ == "__main__": + nlevels = 1024 + niter = 1000 + range_ = 256 + profile = True + doprofile = True + verbose = False + + if doprofile: + import pstats + import cProfile + + if profile: + tref = clock() + if profile: + show_stats("Abans de crear...", tref) + f = h5py.File("/tmp/deep-tree.h5", "w") + if doprofile: + cProfile.run("populate(f, nlevels)", "populate.prof") + stats = pstats.Stats("populate.prof") + stats.strip_dirs() + stats.sort_stats("time", "calls") + if verbose: + stats.print_stats() + else: + stats.print_stats(20) + else: + populate(f, nlevels) + f.close() + if profile: + show_stats("Despres de crear", tref) + +# if profile: tref = time() +# if profile: show_stats("Abans d'obrir...", tref) +# f = h5py.File("/tmp/deep-tree.h5", 'r') +# if profile: show_stats("Abans d'accedir...", tref) +# if doprofile: +# prof.run('getnode(f, nlevels, niter, range_)', 'deep-tree.prof') +# stats = pstats.Stats('deep-tree.prof') +# stats.strip_dirs() +# stats.sort_stats('time', 'calls') +# if verbose: +# stats.print_stats() +# else: +# stats.print_stats(20) +# else: +# getnode(f, nlevels, niter, range_) +# if profile: show_stats("Despres d'accedir", tref) +# f.close() +# if profile: show_stats("Despres de tancar", tref) + +# f = h5py.File("/tmp/deep-tree.h5", 'r') +# g = f +# for i in range(nlevels): +# dset = g["DS1"] +# dset = g["DS2"] +# group2 = g['group2_'] +# g = g['group'] +# f.close() diff --git a/bench/deep-tree.py b/bench/deep-tree.py new file mode 100644 index 0000000..9f3d92d --- /dev/null +++ b/bench/deep-tree.py @@ -0,0 +1,130 @@ +# Small benchmark for compare creation times with parameter +# PYTABLES_SYS_ATTRS active or not. + +import random +from time import perf_counter as clock +from pathlib import Path + +import tables as tb + +random.seed(2) + + +def show_stats(explain, tref): + "Show the used memory (only works for Linux 2.6.x)." + for line in Path("/proc/self/status").read_text().splitlines(): + if line.startswith("VmSize:"): + vmsize = int(line.split()[1]) + elif line.startswith("VmRSS:"): + vmrss = int(line.split()[1]) + elif line.startswith("VmData:"): + vmdata = int(line.split()[1]) + elif line.startswith("VmStk:"): + vmstk = int(line.split()[1]) + elif line.startswith("VmExe:"): + vmexe = int(line.split()[1]) + elif line.startswith("VmLib:"): + vmlib = int(line.split()[1]) + print("Memory usage: ******* %s *******" % explain) + print(f"VmSize: {vmsize:>7} kB\tVmRSS: {vmrss:>7} kB") + print(f"VmData: {vmdata:>7} kB\tVmStk: {vmstk:>7} kB") + print(f"VmExe: {vmexe:>7} kB\tVmLib: {vmlib:>7} kB") + tnow = clock() + print(f"WallClock time: {tnow - tref:.3f}") + return tnow + + +def populate(f, nlevels): + g = f.root + # arr = np.zeros((10,), "f4") + # descr = {'f0': tables.Int32Col(), 'f1': tables.Float32Col()} + for i in range(nlevels): + # dset = f.create_array(g, "DS1", arr) + # dset = f.create_array(g, "DS2", arr) + f.create_carray(g, "DS1", tb.IntAtom(), (10,)) + f.create_carray(g, "DS2", tb.IntAtom(), (10,)) + # dset = f.create_table(g, "DS1", descr) + # dset = f.create_table(g, "DS2", descr) + f.create_group(g, "group2_") + g = f.create_group(g, "group") + + +def getnode(f, nlevels, niter, range_): + for i in range(niter): + nlevel = random.randrange( + (nlevels - range_) / 2, (nlevels + range_) / 2 + ) + groupname = "" + for i in range(nlevel): + groupname += "/group" + groupname += "/DS1" + f.get_node(groupname) + + +if __name__ == "__main__": + nlevels = 1024 + niter = 256 + range_ = 128 + node_cache_slots = 64 + pytables_sys_attrs = True + profile = True + doprofile = True + verbose = False + + if doprofile: + import pstats + import cProfile + + if profile: + tref = clock() + if profile: + show_stats("Abans de crear...", tref) + f = tb.open_file( + "/tmp/PTdeep-tree.h5", + "w", + node_cache_slots=node_cache_slots, + pytables_sys_attrs=pytables_sys_attrs, + ) + if doprofile: + cProfile.run("populate(f, nlevels)", "populate.prof") + stats = pstats.Stats("populate.prof") + stats.strip_dirs() + stats.sort_stats("time", "calls") + if verbose: + stats.print_stats() + else: + stats.print_stats(20) + else: + populate(f, nlevels) + f.close() + if profile: + show_stats("Despres de crear", tref) + + if profile: + tref = clock() + if profile: + show_stats("Abans d'obrir...", tref) + f = tb.open_file( + "/tmp/PTdeep-tree.h5", + "r", + node_cache_slots=node_cache_slots, + pytables_sys_attrs=pytables_sys_attrs, + ) + if profile: + show_stats("Abans d'accedir...", tref) + if doprofile: + cProfile.run("getnode(f, nlevels, niter, range_)", "getnode.prof") + stats = pstats.Stats("getnode.prof") + stats.strip_dirs() + stats.sort_stats("time", "calls") + if verbose: + stats.print_stats() + else: + stats.print_stats(20) + else: + getnode(f, nlevels, niter, range_) + if profile: + show_stats("Despres d'accedir", tref) + f.close() + if profile: + show_stats("Despres de tancar", tref) diff --git a/bench/evaluate.py b/bench/evaluate.py new file mode 100644 index 0000000..b2944e4 --- /dev/null +++ b/bench/evaluate.py @@ -0,0 +1,187 @@ +import sys +from time import perf_counter as clock +from pathlib import Path + +import numpy as np +import numexpr as ne + +import tables as tb + +shape = (1000, 160_000) +# shape = (10,1600) +filters = tb.Filters(complevel=1, complib="blosc2", shuffle=1) +ofilters = tb.Filters(complevel=1, complib="blosc2", shuffle=1) +# filters = tb.Filters(complevel=1, complib="lzo", shuffle=0) +# ofilters = tb.Filters(complevel=1, complib="lzo", shuffle=0) + +# TODO: Makes it sense to add a 's'tring typecode here? +typecode_to_dtype = { + "b": "bool", + "i": "int32", + "l": "int64", + "f": "float32", + "d": "float64", + "c": "complex128", +} + + +def _compute(result, function, arguments, start=None, stop=None, step=None): + """Compute the `function` over the `arguments` and put the outcome in + `result`""" + arg0 = arguments[0] + if hasattr(arg0, "maindim"): + maindim = arg0.maindim + (start, stop, step) = arg0._process_range_read(start, stop, step) + nrowsinbuf = arg0.nrowsinbuf + print("nrowsinbuf-->", nrowsinbuf) + else: + maindim = 0 + (start, stop, step) = (0, len(arg0), 1) + nrowsinbuf = len(arg0) + shape = list(arg0.shape) + shape[maindim] = len(range(start, stop, step)) + + # The slices parameter for arg0.__getitem__ + slices = [slice(0, dim, 1) for dim in arg0.shape] + + # This is a hack to prevent doing unnecessary conversions + # when copying buffers + if hasattr(arg0, "maindim"): + for arg in arguments: + arg._v_convert = False + + # Start the computation itself + for start2 in range(start, stop, step * nrowsinbuf): + # Save the records on disk + stop2 = start2 + step * nrowsinbuf + if stop2 > stop: + stop2 = stop + # Set the proper slice in the main dimension + slices[maindim] = slice(start2, stop2, step) + start3 = (start2 - start) / step + stop3 = start3 + nrowsinbuf + if stop3 > shape[maindim]: + stop3 = shape[maindim] + # Compute the slice to be filled in destination + sl = [] + for i in range(maindim): + sl.append(slice(None, None, None)) + sl.append(slice(start3, stop3, None)) + # Get the values for computing the buffer + values = [arg.__getitem__(tuple(slices)) for arg in arguments] + result[tuple(sl)] = function(*values) + + # Activate the conversion again (default) + if hasattr(arg0, "maindim"): + for arg in arguments: + arg._v_convert = True + + return result + + +def evaluate(ex, out=None, local_dict=None, global_dict=None, **kwargs): + """Evaluate expression and return an array.""" + + # First, get the signature for the arrays in expression + context = ne.necompiler.getContext(kwargs) + names, _ = ne.necompiler.getExprNames(ex, context) + + # Get the arguments based on the names. + call_frame = sys._getframe(1) + if local_dict is None: + local_dict = call_frame.f_locals + if global_dict is None: + global_dict = call_frame.f_globals + arguments = [] + types = [] + for name in names: + try: + a = local_dict[name] + except KeyError: + a = global_dict[name] + arguments.append(a) + if hasattr(a, "atom"): + types.append(a.atom) + else: + types.append(a) + + # Create a signature + signature = [ + (name, ne.necompiler.getType(type_)) + for (name, type_) in zip(names, types) + ] + print("signature-->", signature) + + # Compile the expression + compiled_ex = ne.necompiler.NumExpr(ex, signature, **kwargs) + print("fullsig-->", compiled_ex.fullsig) + + _compute(out, compiled_ex, arguments) + + return + + +if __name__ == "__main__": + iarrays = 0 + oarrays = 0 + doprofile = 1 + dokprofile = 0 + + f = tb.open_file("evaluate.h5", "w") + + # Create some arrays + if iarrays: + a = np.ones(shape, dtype="float32") + b = np.ones(shape, dtype="float32") * 2 + c = np.ones(shape, dtype="float32") * 3 + else: + a = f.create_carray( + f.root, "a", tb.Float32Atom(dflt=1), shape=shape, filters=filters + ) + a[:] = 1 + b = f.create_carray( + f.root, "b", tb.Float32Atom(dflt=2), shape=shape, filters=filters + ) + b[:] = 2 + c = f.create_carray( + f.root, "c", tb.Float32Atom(dflt=3), shape=shape, filters=filters + ) + c[:] = 3 + if oarrays: + out = np.empty(shape, dtype="float32") + else: + out = f.create_carray( + f.root, "out", tb.Float32Atom(), shape=shape, filters=ofilters + ) + + t0 = clock() + if iarrays and oarrays: + # out = ne.evaluate("a*b+c") + out = a * b + c + elif doprofile: + import pstats + import cProfile + + cProfile.run('evaluate("a*b+c", out)', "evaluate.prof") + stats = pstats.Stats("evaluate.prof") + stats.strip_dirs() + stats.sort_stats("time", "calls") + stats.print_stats(20) + elif dokprofile: + import cProfile + + import lsprofcalltree + + prof = cProfile.Profile() + prof.run('evaluate("a*b+c", out)') + kcg = lsprofcalltree.KCacheGrind(prof) + with Path("evaluate.kcg").open("w") as ofile: + kcg.output(ofile) + else: + evaluate("a*b+c", out) + print(f"Time for evaluate--> {clock() - t0:.3f}") + + # print "out-->", `out` + # print `out[:]` + + f.close() diff --git a/bench/expression.py b/bench/expression.py new file mode 100644 index 0000000..6604c7a --- /dev/null +++ b/bench/expression.py @@ -0,0 +1,184 @@ +from time import perf_counter as clock +from pathlib import Path + +import numpy as np + +import tables as tb + +OUT_DIR = Path("/scratch2/faltet/") # the directory for data output + +shape = (1000, 1000 * 1000) # shape for input arrays +expr = "a*b+1" # Expression to be computed + +nrows, ncols = shape + + +def tables(docompute, dowrite, complib, verbose): + + # Filenames + ifilename = OUT_DIR / "expression-inputs.h5" + ofilename = OUT_DIR / "expression-outputs.h5" + + # Filters + shuffle = True + if complib == "blosc": + filters = tb.Filters(complevel=1, complib="blosc", shuffle=shuffle) + elif complib == "lzo": + filters = tb.Filters(complevel=1, complib="lzo", shuffle=shuffle) + elif complib == "zlib": + filters = tb.Filters(complevel=1, complib="zlib", shuffle=shuffle) + else: + filters = tb.Filters(complevel=0, shuffle=False) + if verbose: + print("Will use filters:", filters) + + if dowrite: + f = tb.open_file(ifilename, "w") + + # Build input arrays + t0 = clock() + root = f.root + a = f.create_carray( + root, "a", tb.Float32Atom(), shape, filters=filters + ) + b = f.create_carray( + root, "b", tb.Float32Atom(), shape, filters=filters + ) + if verbose: + print("chunkshape:", a.chunkshape) + print("chunksize:", np.prod(a.chunkshape) * a.dtype.itemsize) + # row = np.linspace(0, 1, ncols) + row = np.arange(0, ncols, dtype="float32") + for i in range(nrows): + a[i] = row * (i + 1) + b[i] = row * (i + 1) * 2 + f.close() + print(f"[tables.Expr] Time for creating inputs: {clock() - t0:.3f}") + + if docompute: + f = tb.open_file(ifilename, "r") + fr = tb.open_file(ofilename, "w") + a = f.root.a + b = f.root.b + r1 = f.create_carray( + fr.root, "r1", tb.Float32Atom(), shape, filters=filters + ) + # The expression + e = tb.Expr(expr) + e.set_output(r1) + t0 = clock() + e.eval() + if verbose: + print("First ten values:", r1[0, :10]) + f.close() + fr.close() + print(f"[tables.Expr] Time for computing & save: {clock() - t0:.3f}") + + +def memmap(docompute, dowrite, verbose): + + afilename = OUT_DIR / "memmap-a.bin" + bfilename = OUT_DIR / "memmap-b.bin" + rfilename = OUT_DIR / "memmap-output.bin" + if dowrite: + t0 = clock() + a = np.memmap(afilename, dtype="float32", mode="w+", shape=shape) + b = np.memmap(bfilename, dtype="float32", mode="w+", shape=shape) + + # Fill arrays a and b + # row = np.linspace(0, 1, ncols) + row = np.arange(0, ncols, dtype="float32") + for i in range(nrows): + a[i] = row * (i + 1) + b[i] = row * (i + 1) * 2 + del a, b # flush data + print(f"[numpy.memmap] Time for creating inputs: {clock() - t0:.3f}") + + if docompute: + t0 = clock() + # Reopen inputs in read-only mode + a = np.memmap(afilename, dtype="float32", mode="r", shape=shape) + b = np.memmap(bfilename, dtype="float32", mode="r", shape=shape) + # Create the array output + r = np.memmap(rfilename, dtype="float32", mode="w+", shape=shape) + # Do the computation row by row + for i in range(nrows): + r[i] = eval(expr, {"a": a[i], "b": b[i]}) + if verbose: + print("First ten values:", r[0, :10]) + del a, b + del r # flush output data + print(f"[numpy.memmap] Time for compute & save: {clock() - t0:.3f}") + + +def do_bench(what, documpute, dowrite, complib, verbose): + if what == "tables": + tables(docompute, dowrite, complib, verbose) + if what == "memmap": + memmap(docompute, dowrite, verbose) + + +if __name__ == "__main__": + import sys + import getopt + + usage = ( + """usage: %s [-T] [-M] [-c] [-w] [-v] [-z complib] + -T use tables.Expr + -M use numpy.memmap + -c do the computation only + -w write inputs only + -v verbose mode + -z select compression library ('zlib' or 'lzo'). Default is None. +""" + % sys.argv[0] + ) + + try: + opts, pargs = getopt.getopt(sys.argv[1:], "TMcwvz:") + except Exception: + sys.stderr.write(usage) + sys.exit(1) + + # default options + usepytables = False + usememmap = False + docompute = False + dowrite = False + verbose = False + complib = None + + # Get the options + for option in opts: + if option[0] == "-T": + usepytables = True + elif option[0] == "-M": + usememmap = True + elif option[0] == "-c": + docompute = True + elif option[0] == "-w": + dowrite = True + elif option[0] == "-v": + verbose = True + elif option[0] == "-z": + complib = option[1] + if complib not in ("blosc", "lzo", "zlib"): + print( + "complib must be 'lzo' or 'zlib' " + "and you passed: '%s'" % complib + ) + sys.exit(1) + + # If not a backend selected, abort + if not usepytables and not usememmap: + print("Please select a backend:") + print("PyTables.Expr: -T") + print("NumPy.memmap: -M") + sys.exit(1) + + # Select backend and do the benchmark + if usepytables: + what = "tables" + if usememmap: + what = "memmap" + do_bench(what, docompute, dowrite, complib, verbose) diff --git a/bench/fetch_meteo_data.py b/bench/fetch_meteo_data.py new file mode 100644 index 0000000..ef720a3 --- /dev/null +++ b/bench/fetch_meteo_data.py @@ -0,0 +1,119 @@ +import sys +import datetime + +import s3fs +import numpy as np +import xarray as xr + +import tables as tb + +dim1 = 24 +dim2 = 721 +dim3 = 1440 +nrows = dim1 * dim2 * dim3 + + +class ERA5(tb.IsDescription): + lat = tb.Int32Col() # integer + lon = tb.Int32Col() # integer + time = tb.Int32Col() # integer + precip = tb.Float32Col() # float + solar = tb.Float32Col() # float + air = tb.Float32Col() # float + snow = tb.Float32Col() # float + wind = tb.Float32Col() # float + + +def open_zarr(year, month, datestart, dateend, path): + fs = s3fs.S3FileSystem(anon=True) + datestring = f"era5-pds/zarr/{year}/{month:02d}/data/" + s3map = s3fs.S3Map(datestring + path + ".zarr/", s3=fs) + dset = xr.open_dataset(s3map, engine="zarr") + if path[:3] == "air" or path[:3] == "sno" or path[:3] == "eas": + dset = dset.sel( + time0=slice(np.datetime64(datestart), np.datetime64(dateend)) + ) + else: + dset = dset.sel( + time1=slice(np.datetime64(datestart), np.datetime64(dateend)) + ) + return getattr(dset, path) + + +# Choose the datasets for table +datasets = [ + "precipitation_amount_1hour_Accumulation", + "integral_wrt_time_of_surface_direct_downwelling_shortwave_flux_" + "in_air_1hour_Accumulation", + "air_pressure_at_mean_sea_level", + "snow_density", + "eastward_wind_at_10_metres", +] + +# Create the table +f = tb.open_file("blosc_table.h5", "w") +table_blosc = f.create_table( + f.root, + "table_blosc", + ERA5, + "Blosc table", + tb.Filters( + complevel=5, + shuffle=False, + bitshuffle=True, + complib="blosc2:zstd", + # chunkshape=chunklen, + ), + expectedrows=nrows, +) + +# Create the structured NumPy buffer +dt = [(dim, np.int32) for dim in ("lat", "lon", "time")] +dt += [(urlpath[:5], np.float32) for urlpath in datasets] +dt = np.dtype(dt) +day_block = np.empty(nrows, dtype=dt) +day_block["time"] = np.repeat(np.arange(dim1), dim2 * dim3) +day_block["lat"] = np.tile(np.repeat(np.arange(dim2), dim3), dim1) +day_block["lon"] = np.tile(np.arange(dim3), dim1 * dim2) + +# Handle args +verbose = False +if "-v" in sys.argv: + verbose = True + sys.argv.remove("-v") +if len(sys.argv) != 3: + raise Exception("You must pass 2 arguments: start date and stop date") +try: + date_start = datetime.date.fromisoformat(sys.argv[1]) + date_stop = datetime.date.fromisoformat(sys.argv[2]) +except ValueError: + raise Exception("Dates must be in ISO format (e.g. YYYY-MM-DD)") +if date_stop < date_start: + raise Exception("Start date must be before stop date") + +# Fetch and append +date_i = date_start +a_day = datetime.timedelta(days=1) +while date_i < date_stop: + for dset_name in datasets: + if verbose: + print( + f"Fetching data with S3 from {dset_name} for " + f"date {date_i.isoformat()}" + ) + dset = open_zarr( + date_i.year, + date_i.month, + date_i.isoformat(), + (date_i).isoformat() + " 23:59", + dset_name, + ) + values = dset.values.flatten() + day_block[dset_name[:5]] = values[:] + + date_i = date_i + a_day + table_blosc.append(day_block) + +table_blosc.flush() + +f.close() diff --git a/bench/get-figures-ranges.py b/bench/get-figures-ranges.py new file mode 100644 index 0000000..e70bdb8 --- /dev/null +++ b/bench/get-figures-ranges.py @@ -0,0 +1,259 @@ +from pathlib import Path + +from matplotlib import pyplot as plt + +linewidth = 2 +# markers= ['+', ',', 'o', '.', 's', 'v', 'x', '>', '<', '^'] +# markers= [ 'x', '+', 'o', 's', 'v', '^', '>', '<', ] +markers = [ + "s", + "o", + "v", + "^", + "+", + "x", + ">", + "<", +] +markersize = 8 + + +def get_values(filename): + sizes = [] + values = [] + isize = None + txtime = 0 + for line in Path(filename).read_text().splitlines(): + if line.startswith("range"): + tmp = line.split(":")[1] + tmp = tmp.strip() + tmp = tmp[1:-1] + lower, upper = int(tmp.split(",")[0]), int(tmp.split(",")[1]) + isize = upper - lower + # print "isize-->", isize + if isize is None or isize == 0: + continue + if insert and line.startswith("Insert time"): + tmp = line.split(":")[1] + # itime = float(tmp[:tmp.index(',')]) + itime = float(tmp) + sizes.append(isize) + values.append(itime) + elif line.startswith("Index time"): + tmp = line.split(":")[1] + # xtime = float(tmp[:tmp.index(',')]) + xtime = float(tmp) + txtime += xtime + if create_index and create_index in line: + sizes.append(isize) + values.append(xtime) + elif create_total and txtime > xtime: + sizes.append(isize) + values.append(txtime) + elif table_size and line.startswith("Table size"): + tsize = float(line.split(":")[1]) + sizes.append(isize) + values.append(tsize) + elif indexes_size and line.startswith("Indexes size"): + xsize = float(line.split(":")[1]) + sizes.append(isize) + values.append(xsize) + elif total_size and line.startswith("Full size"): + fsize = float(line.split(":")[1]) + sizes.append(isize) + values.append(fsize) + elif (query or query_cold or query_warm) and line.startswith( + "[NOREP]" + ): + tmp = line.split(":")[1] + try: + qtime = float(tmp[: tmp.index("+-")]) + except ValueError: + qtime = float(tmp) + if colname in line: + if query and "1st" in line: + sizes.append(isize) + values.append(qtime) + elif query_cold and "cold" in line: + sizes.append(isize) + values.append(qtime) + elif query_warm and "warm" in line: + sizes.append(isize) + values.append(qtime) + return sizes, values + + +def show_plot(plots, yaxis, legends, gtitle): + plt.xlabel("Number of hits") + plt.ylabel(yaxis) + plt.title(gtitle) + # plt.ylim(0, 100) + plt.grid(True) + + # legends = [f[f.find('-'):f.index('.out')] for f in filenames] + # legends = [l.replace('-', ' ') for l in legends] + # plt.legend([p[0] for p in plots], legends, loc = "upper left") + plt.legend([p[0] for p in plots], legends, loc="best") + + # plt.subplots_adjust(bottom=0.2, top=None, wspace=0.2, hspace=0.2) + if outfile: + plt.savefig(outfile) + else: + plt.show() + + +if __name__ == "__main__": + + import sys + import getopt + + usage = ( + """usage: %s [-o file] [-t title] [--insert] [--create-index] [--create-total] [--table-size] [--indexes-size] [--total-size] [--query=colname] [--query-cold=colname] [--query-warm=colname] files + -o filename for output (only .png and .jpg extensions supported) + -t title of the plot + --insert -- Insert time for table + --create-index=colname -- Index time for column + --create-total -- Total time for creation of table + indexes + --table-size -- Size of table + --indexes-size -- Size of all indexes + --total-size -- Total size of table + indexes + --query=colname -- Time for querying the specified column + --query-cold=colname -- Time for querying the specified column (cold cache) + --query-warm=colname -- Time for querying the specified column (warm cache) + \n""" + % sys.argv[0] + ) + + try: + opts, pargs = getopt.getopt( + sys.argv[1:], + "o:t:", + [ + "insert", + "create-index=", + "create-total", + "table-size", + "indexes-size", + "total-size", + "query=", + "query-cold=", + "query-warm=", + ], + ) + except Exception: + sys.stderr.write(usage) + sys.exit(0) + + progname = sys.argv[0] + args = sys.argv[1:] + + # if we pass too few parameters, abort + if len(pargs) < 1: + sys.stderr.write(usage) + sys.exit(0) + + # default options + outfile = None + insert = 0 + create_index = None + create_total = 0 + table_size = 0 + indexes_size = 0 + total_size = 0 + query = 0 + query_cold = 0 + query_warm = 0 + colname = None + yaxis = "No axis name" + tit = None + gtitle = "Please set a title!" + + # Get the options + for option in opts: + if option[0] == "-o": + outfile = option[1] + elif option[0] == "-t": + tit = option[1] + elif option[0] == "--insert": + insert = 1 + yaxis = "Time (s)" + gtitle = "Insert time for table" + elif option[0] == "--create-index": + create_index = option[1] + yaxis = "Time (s)" + gtitle = "Create index time for column " + create_index + elif option[0] == "--create-total": + create_total = 1 + yaxis = "Time (s)" + gtitle = "Create time for table + indexes" + elif option[0] == "--table-size": + table_size = 1 + yaxis = "Size (MB)" + gtitle = "Table size" + elif option[0] == "--indexes-size": + indexes_size = 1 + yaxis = "Size (MB)" + gtitle = "Indexes size" + elif option[0] == "--total-size": + total_size = 1 + yaxis = "Size (MB)" + gtitle = "Total size (table + indexes)" + elif option[0] == "--query": + query = 1 + colname = option[1] + yaxis = "Time (s)" + gtitle = "Query time for " + colname + " column (first query)" + elif option[0] == "--query-cold": + query_cold = 1 + colname = option[1] + yaxis = "Time (s)" + gtitle = "Query time for " + colname + " column (cold cache)" + elif option[0] == "--query-warm": + query_warm = 1 + colname = option[1] + yaxis = "Time (s)" + gtitle = "Query time for " + colname + " column (warm cache)" + + filenames = pargs + + if tit: + gtitle = tit + + plots = [] + legends = [] + for filename in filenames: + plegend = filename[filename.find("-") : filename.index(".out")] + plegend = plegend.replace("-", " ") + xval, yval = get_values(filename) + print(f"Values for {filename} --> {xval}, {yval}") + if "PyTables" in filename or "pytables" in filename: + plot = plt.loglog(xval, yval, linewidth=2) + # plot = semilogx(xval, yval, linewidth=2) + plots.append(plot) + plt.setp( + plot, + marker=markers[0], + markersize=markersize, + linewidth=linewidth, + ) + else: + plots.append(plt.loglog(xval, yval, linewidth=3, color="m")) + # plots.append(semilogx(xval, yval, linewidth=3, color='m')) + # plots.append(semilogx(xval, yval, linewidth=5)) + legends.append(plegend) + if 0: # Per a introduir dades simulades si es vol... + xval = [ + 1000, + 10_000, + 100_000, + 1_000_000, + 10_000_000, + 100_000_000, + 1_000_000_000, + ] + # yval = [0.003, 0.005, 0.02, 0.06, 1.2, + # 40, 210] + yval = [0.0009, 0.0011, 0.0022, 0.005, 0.02, 0.2, 5.6] + plots.append(plt.loglog(xval, yval, linewidth=5)) + legends.append("PyTables Std") + show_plot(plots, yaxis, legends, gtitle) diff --git a/bench/get-figures.py b/bench/get-figures.py new file mode 100644 index 0000000..1def780 --- /dev/null +++ b/bench/get-figures.py @@ -0,0 +1,318 @@ +from pathlib import Path + +from matplotlib import pyplot as plt + +linewidth = 2 +# markers= ['+', ',', 'o', '.', 's', 'v', 'x', '>', '<', '^'] +# markers= [ 'x', '+', 'o', 's', 'v', '^', '>', '<', ] +markers = [ + "s", + "o", + "v", + "^", + "+", + "x", + ">", + "<", +] +markersize = 8 + + +def get_values(filename): + sizes = [] + values = [] + for line in Path(filename).read_text().splitlines(): + if line.startswith("Processing database:"): + txtime = 0 + line = line.split(":")[1] + # Check if entry is compressed and if has to be processed + line = line[: line.rfind(".")] + params = line.split("-") + for param in params: + if param[-1] in ("k", "m", "g"): + size = param + isize = int(size[:-1]) * 1000 + if size[-1] == "m": + isize *= 1000 + elif size[-1] == "g": + isize *= 1000 * 1000 + elif insert and line.startswith("Insert time"): + tmp = line.split(":")[1] + itime = float(tmp) + sizes.append(isize) + values.append(itime) + elif (overlaps or entropy) and line.startswith("overlaps"): + tmp = line.split(":")[1] + e1, e2 = tmp.split() + if isize in sizes: + sizes.pop() + values.pop() + sizes.append(isize) + if overlaps: + values.append(int(e1) + 1) + else: + values.append(float(e2) + 1) + elif (create_total or create_index) and line.startswith("Index time"): + tmp = line.split(":")[1] + xtime = float(tmp) + txtime += xtime + if create_index and create_index in line: + sizes.append(isize) + values.append(xtime) + elif create_total and txtime > xtime: + sizes.append(isize) + values.append(txtime) + elif table_size and line.startswith("Table size"): + tsize = float(line.split(":")[1]) + sizes.append(isize) + values.append(tsize) + elif indexes_size and line.startswith("Indexes size"): + xsize = float(line.split(":")[1]) + sizes.append(isize) + values.append(xsize) + elif total_size and line.startswith("Full size"): + fsize = float(line.split(":")[1]) + sizes.append(isize) + values.append(fsize) + elif query and line.startswith("Query time"): + tmp = line.split(":")[1] + qtime = float(tmp) + if colname in line: + sizes.append(isize) + values.append(qtime) + elif (query or query_cold or query_warm) and line.startswith( + "[NOREP]" + ): + tmp = line.split(":")[1] + try: + qtime = float(tmp[: tmp.index("+-")]) + except ValueError: + qtime = float(tmp) + if colname in line: + if query and "1st" in line: + sizes.append(isize) + values.append(qtime) + elif query_cold and "cold" in line: + sizes.append(isize) + values.append(qtime) + elif query_warm and "warm" in line: + sizes.append(isize) + values.append(qtime) + elif query_repeated and line.startswith("[REP]"): + if colname in line and "warm" in line: + tmp = line.split(":")[1] + qtime = float(tmp[: tmp.index("+-")]) + sizes.append(isize) + values.append(qtime) + return sizes, values + + +def show_plot(plots, yaxis, legends, gtitle): + plt.xlabel("Number of rows") + plt.ylabel(yaxis) + plt.title(gtitle) + # plt.xlim(10**3, 10**9) + plt.xlim(10**3, 10**10) + # plt.ylim(1.0e-5) + # plt.ylim(-1e4, 1e5) + # plt.ylim(-1e3, 1e4) + # plt.ylim(-1e2, 1e3) + plt.grid(True) + + # legends = [f[f.find('-'):f.index('.out')] for f in filenames] + # legends = [l.replace('-', ' ') for l in legends] + plt.legend([p[0] for p in plots], legends, loc="upper left") + # plt.legend([p[0] for p in plots], legends, loc = "center left") + + # subplots_adjust(bottom=0.2, top=None, wspace=0.2, hspace=0.2) + if outfile: + plt.savefig(outfile) + else: + plt.show() + + +if __name__ == "__main__": + + import sys + import getopt + + usage = ( + """usage: %s [-o file] [-t title] [--insert] [--create-index] [--create-total] [--overlaps] [--entropy] [--table-size] [--indexes-size] [--total-size] [--query=colname] [--query-cold=colname] [--query-warm=colname] [--query-repeated=colname] files + -o filename for output (only .png and .jpg extensions supported) + -t title of the plot + --insert -- Insert time for table + --create-index=colname -- Index time for column + --create-total -- Total time for creation of table + indexes + --overlaps -- The overlapping for the created index + --entropy -- The entropy for the created index + --table-size -- Size of table + --indexes-size -- Size of all indexes + --total-size -- Total size of table + indexes + --query=colname -- Time for querying the specified column + --query-cold=colname -- Time for querying the specified column (cold cache) + --query-warm=colname -- Time for querying the specified column (warm cache) + --query-repeated=colname -- Time for querying the specified column (rep query) + \n""" + % sys.argv[0] + ) + + try: + opts, pargs = getopt.getopt( + sys.argv[1:], + "o:t:", + [ + "insert", + "create-index=", + "create-total", + "overlaps", + "entropy", + "table-size", + "indexes-size", + "total-size", + "query=", + "query-cold=", + "query-warm=", + "query-repeated=", + ], + ) + except Ellipsis: + sys.stderr.write(usage) + sys.exit(0) + + progname = sys.argv[0] + args = sys.argv[1:] + + # if we pass too few parameters, abort + if len(pargs) < 1: + sys.stderr.write(usage) + sys.exit(0) + + # default options + outfile = None + insert = 0 + create_index = None + create_total = 0 + overlaps = 0 + entropy = 0 + table_size = 0 + indexes_size = 0 + total_size = 0 + query = 0 + query_cold = 0 + query_warm = 0 + query_repeated = 0 + colname = None + yaxis = "No axis name" + tit = None + gtitle = "Please set a title!" + + # Get the options + for option in opts: + if option[0] == "-o": + outfile = option[1] + elif option[0] == "-t": + tit = option[1] + elif option[0] == "--insert": + insert = 1 + yaxis = "Time (s)" + gtitle = "Insert time for table" + elif option[0] == "--create-index": + create_index = option[1] + yaxis = "Time (s)" + gtitle = "Create index time for " + create_index + " column" + elif option[0] == "--create-total": + create_total = 1 + yaxis = "Time (s)" + gtitle = "Create time for table + indexes" + elif option[0] == "--overlaps": + overlaps = 1 + yaxis = "Overlapping index + 1" + gtitle = "Overlapping for col4 column" + elif option[0] == "--entropy": + entropy = 1 + yaxis = "Entropy + 1" + gtitle = "Entropy for col4 column" + elif option[0] == "--table-size": + table_size = 1 + yaxis = "Size (MB)" + gtitle = "Table size" + elif option[0] == "--indexes-size": + indexes_size = 1 + yaxis = "Size (MB)" + # gtitle = "Indexes size" + gtitle = "Index size for col4 column" + elif option[0] == "--total-size": + total_size = 1 + yaxis = "Size (MB)" + gtitle = "Total size (table + indexes)" + elif option[0] == "--query": + query = 1 + colname = option[1] + yaxis = "Time (s)" + gtitle = "Query time for " + colname + " column (first query)" + elif option[0] == "--query-cold": + query_cold = 1 + colname = option[1] + yaxis = "Time (s)" + gtitle = "Query time for " + colname + " column (cold cache)" + elif option[0] == "--query-warm": + query_warm = 1 + colname = option[1] + yaxis = "Time (s)" + gtitle = "Query time for " + colname + " column (warm cache)" + elif option[0] == "--query-repeated": + query_repeated = 1 + colname = option[1] + yaxis = "Time (s)" + gtitle = "Query time for " + colname + " column (repeated query)" + + gtitle = gtitle.replace("col2", "Int32") + gtitle = gtitle.replace("col4", "Float64") + + filenames = pargs + + if tit: + gtitle = tit + + plots = [] + legends = [] + for i, filename in enumerate(filenames): + plegend = filename[: filename.index(".out")] + plegend = plegend.replace("-", " ") + # plegend = plegend.replace('zlib1', '') + if filename.find("PyTables") != -1: + xval, yval = get_values(filename) + print(f"Values for {filename} --> {xval}, {yval}") + if xval != []: + plot = plt.loglog(xval, yval) + # plot = semilogx(xval, yval) + plt.setp( + plot, + marker=markers[i], + markersize=markersize, + linewidth=linewidth, + ) + plots.append(plot) + legends.append(plegend) + else: + xval, yval = get_values(filename) + print(f"Values for {filename} --> {xval}, {yval}") + plots.append(plt.loglog(xval, yval, linewidth=3, color="m")) + # plots.append(semilogx(xval, yval, linewidth=linewidth, color='m')) + legends.append(plegend) + if 0: # Per a introduir dades simulades si es vol... + xval = [ + 1000, + 10_000, + 100_000, + 1_000_000, + 10_000_000, + 100_000_000, + 1_000_000_000, + ] + # yval = [0.003, 0.005, 0.02, 0.06, 1.2, + # 40, 210] + yval = [0.0009, 0.0011, 0.0022, 0.005, 0.02, 0.2, 5.6] + plots.append(plt.loglog(xval, yval, linewidth=linewidth)) + legends.append("PyTables Std") + show_plot(plots, yaxis, legends, gtitle) diff --git a/bench/indexed_search.py b/bench/indexed_search.py new file mode 100644 index 0000000..496356f --- /dev/null +++ b/bench/indexed_search.py @@ -0,0 +1,509 @@ +import random +import subprocess +from time import perf_counter as clock +from pathlib import Path + +import numpy as np + +# Constants + +STEP = 1000 * 100 # the size of the buffer to fill the table, in rows +SCALE = 0.1 # standard deviation of the noise compared with actual +# values +NI_NTIMES = 1 # The number of queries for doing a mean (non-idx cols) +# COLDCACHE = 10 # The number of reads where the cache is considered 'cold' +# WARMCACHE = 50 # The number of reads until the cache is considered 'warmed' +# READ_TIMES = WARMCACHE+50 # The number of complete calls to DB.query_db() +# COLDCACHE = 50 # The number of reads where the cache is considered 'cold' +# WARMCACHE = 50 # The number of reads until the cache is considered 'warmed' +# READ_TIMES = WARMCACHE+50 # The number of complete calls to DB.query_db() +MROW = 1000 * 1000 + +# Test values +COLDCACHE = 5 # The number of reads where the cache is considered 'cold' +WARMCACHE = 5 # The number of reads until the cache is considered 'warmed' +READ_TIMES = 10 # The number of complete calls to DB.query_db() + +# global variables +rdm_cod = ["lin", "rnd"] +prec = 6 # precision for printing floats purposes + + +def get_nrows(nrows_str): + powers = {"k": 3, "m": 6, "g": 9} + try: + return int(float(nrows_str[:-1]) * 10 ** powers[nrows_str[-1]]) + except KeyError: + raise ValueError( + "value of nrows must end with either 'k', 'm' or 'g' suffixes." + ) + + +class DB: + + def __init__(self, nrows, rng, userandom): + global step, scale + self.step = STEP + self.scale = SCALE + self.rng = rng + self.userandom = userandom + self.filename = "-".join([rdm_cod[userandom], nrows]) + self.nrows = get_nrows(nrows) + + def get_db_size(self): + sout = subprocess.Popen( + "sync;du -s %s" % self.filename, shell=True, stdout=subprocess.PIPE + ).stdout + line = sout[0] + return int(line.split()[0]) + + def print_mtime(self, t1, explain): + mtime = clock() - t1 + print(f"{explain}: {mtime:.6f}") + print(f"Krows/s: {self.nrows / 1000 / mtime:.6f}") + + def print_qtime(self, colname, ltimes): + qtime1 = ltimes[0] # First measured time + qtime2 = ltimes[-1] # Last measured time + print(f"Query time for {colname}: {qtime1:.6f}") + print(f"Mrows/s: {self.nrows / MROW / qtime1:.6f}") + print(f"Query time for {colname} (cached): {qtime2:.6f}") + print(f"Mrows/s (cached): {self.nrows / MROW / qtime2:.6f}") + + def norm_times(self, ltimes): + "Get the mean and stddev of ltimes, avoiding the extreme values." + lmean = ltimes.mean() + lstd = ltimes.std() + ntimes = ltimes[ltimes < lmean + lstd] + nmean = ntimes.mean() + nstd = ntimes.std() + return nmean, nstd + + def print_qtime_idx(self, colname, ltimes, repeated, verbose): + if repeated: + r = "[REP] " + else: + r = "[NOREP] " + ltimes = np.array(ltimes) + _ = len(ltimes) + qtime1 = ltimes[0] # First measured time + ctimes = ltimes[1:COLDCACHE] + cmean, cstd = self.norm_times(ctimes) + wtimes = ltimes[WARMCACHE:] + wmean, wstd = self.norm_times(wtimes) + if verbose: + print("Times for cold cache:\n", ctimes) + # print "Times for warm cache:\n", wtimes + hist1, hist2 = np.histogram(wtimes) + print(f"Histogram for warm cache: {hist1}\n{hist2}") + print(f"{r}1st query time for {colname}: {qtime1:.{prec}f}") + print( + f"{r}Query time for {colname} (cold cache): " + f"{cmean:.{prec}f} +- {cstd:.{prec}f}" + ) + print( + f"{r}Query time for {colname} (warm cache): " + f"{wmean:.{prec}f} +- {wstd:.{prec}f}" + ) + + def print_db_sizes(self, init, filled, indexed): + table_size = (filled - init) / 1024 + indexes_size = (indexed - filled) / 1024 + print(f"Table size (MB): {table_size:.3f}") + print(f"Indexes size (MB): {indexes_size:.3f}") + print(f"Full size (MB): {table_size + indexes_size:.3f}") + + def fill_arrays(self, start, stop): + arr_f8 = np.arange(start, stop, dtype="float64") + arr_i4 = np.arange(start, stop, dtype="int32") + if self.userandom: + arr_f8 += np.random.normal(0, stop * self.scale, size=stop - start) + arr_i4 = np.array(arr_f8, dtype="int32") + return arr_i4, arr_f8 + + def create_db(self, dtype, kind, optlevel, verbose): + self.con = self.open_db(remove=1) + self.create_table(self.con) + init_size = self.get_db_size() + t1 = clock() + self.fill_table(self.con) + table_size = self.get_db_size() + self.print_mtime(t1, "Insert time") + self.index_db(dtype, kind, optlevel, verbose) + indexes_size = self.get_db_size() + self.print_db_sizes(init_size, table_size, indexes_size) + self.close_db(self.con) + + def index_db(self, dtype, kind, optlevel, verbose): + if dtype == "int": + idx_cols = ["col2"] + elif dtype == "float": + idx_cols = ["col4"] + else: + idx_cols = ["col2", "col4"] + for colname in idx_cols: + t1 = clock() + self.index_col(self.con, colname, kind, optlevel, verbose) + self.print_mtime(t1, "Index time (%s)" % colname) + + def query_db( + self, + niter, + dtype, + onlyidxquery, + onlynonidxquery, + avoidfscache, + verbose, + inkernel, + ): + self.con = self.open_db() + if dtype == "int": + reg_cols = ["col1"] + idx_cols = ["col2"] + elif dtype == "float": + reg_cols = ["col3"] + idx_cols = ["col4"] + else: + reg_cols = ["col1", "col3"] + idx_cols = ["col2", "col4"] + if avoidfscache: + rseed = int(np.random.randint(self.nrows)) + else: + rseed = 19 + # Query for non-indexed columns + np.random.seed(rseed) + base = np.random.randint(self.nrows) + if not onlyidxquery: + for colname in reg_cols: + ltimes = [] + random.seed(rseed) + for i in range(NI_NTIMES): + t1 = clock() + results = self.do_query(self.con, colname, base, inkernel) + ltimes.append(clock() - t1) + if verbose: + print("Results len:", results) + self.print_qtime(colname, ltimes) + # Always reopen the file after *every* query loop. + # Necessary to make the benchmark to run correctly. + self.close_db(self.con) + self.con = self.open_db() + # Query for indexed columns + if not onlynonidxquery: + for colname in idx_cols: + ltimes = [] + np.random.seed(rseed) + rndbase = np.random.randint(self.nrows, size=niter) + # First, non-repeated queries + for i in range(niter): + base = rndbase[i] + t1 = clock() + results = self.do_query(self.con, colname, base, inkernel) + # results, tprof = self.do_query( + # self.con, colname, base, inkernel) + ltimes.append(clock() - t1) + if verbose: + print("Results len:", results) + self.print_qtime_idx(colname, ltimes, False, verbose) + # Always reopen the file after *every* query loop. + # Necessary to make the benchmark to run correctly. + self.close_db(self.con) + self.con = self.open_db() + ltimes = [] + # Second, repeated queries + # for i in range(niter): + # t1=time() + # results = self.do_query( + # self.con, colname, base, inkernel) + # results, tprof = self.do_query(self.con, colname, base, inkernel) + # ltimes.append(time()-t1) + # if verbose: + # print "Results len:", results + # self.print_qtime_idx(colname, ltimes, True, verbose) + # Print internal PyTables index tprof statistics + # tprof = np.array(tprof) + # tmean, tstd = self.norm_times(tprof) + # print "tprof-->", round(tmean, prec), "+-", round(tstd, prec) + # print "tprof hist-->", \ + # np.histogram(tprof) + # print "tprof raw-->", tprof + # Always reopen the file after *every* query loop. + # Necessary to make the benchmark to run correctly. + self.close_db(self.con) + self.con = self.open_db() + # Finally, close the file. + self.close_db(self.con) + + def close_db(self, con): + con.close() + + +if __name__ == "__main__": + import sys + import getopt + + try: + import psyco + + psyco_imported = 1 + except Exception: + psyco_imported = 0 + + usage = ( + """usage: %s [-T] [-P] [-v] [-f] [-k] [-p] [-m] [-c] [-q] [-i] [-I] [-S] [-x] [-z complevel] [-l complib] [-R range] [-N niter] [-n nrows] [-d datadir] [-O level] [-t kind] [-s] col -Q [suplim] + -T use Pytables + -P use Postgres + -v verbose + -f do a profile of the run (only query functionality) + -k do a profile for kcachegrind use (out file is 'indexed_search.kcg') + -p use "psyco" if available + -m use random values to fill the table + -q do a query (both indexed and non-indexed versions) + -i do a query (just indexed one) + -I do a query (just in-kernel one) + -S do a query (just standard one) + -x choose a different seed for random numbers (i.e. avoid FS cache) + -c create the database + -z compress with zlib (no compression by default) + -l use complib for compression (zlib used by default) + -R select a range in a field in the form "start,stop" (def "0,10") + -N number of iterations for reading + -n sets the number of rows (in krows) in each table + -d directory to save data (default: data.nobackup) + -O set the optimization level for PyTables indexes + -t select the index type: "medium" (default) or "full", "light", "ultralight" + -s select a type column for operations ('int' or 'float'. def all) + -Q do a repeteated query up to 10**value + \n""" + % sys.argv[0] + ) + + try: + opts, pargs = getopt.getopt( + sys.argv[1:], "TPvfkpmcqiISxz:l:R:N:n:d:O:t:s:Q:" + ) + except Exception: + sys.stderr.write(usage) + sys.exit(1) + + # default options + usepytables = 0 + usepostgres = 0 + verbose = 0 + doprofile = 0 + dokprofile = 0 + usepsyco = 0 + userandom = 0 + docreate = 0 + optlevel = 0 + kind = "medium" + docompress = 0 + complib = "zlib" + doquery = False + onlyidxquery = False + onlynonidxquery = False + inkernel = True + avoidfscache = 0 + # rng = [-10, 10] + rng = [-1000, -1000] + repeatquery = 0 + repeatvalue = 0 + krows = "1k" + niter = READ_TIMES + dtype = "all" + datadir = "data.nobackup" + + # Get the options + for option in opts: + if option[0] == "-T": + usepytables = 1 + elif option[0] == "-P": + usepostgres = 1 + elif option[0] == "-v": + verbose = 1 + elif option[0] == "-f": + doprofile = 1 + elif option[0] == "-k": + dokprofile = 1 + elif option[0] == "-p": + usepsyco = 1 + elif option[0] == "-m": + userandom = 1 + elif option[0] == "-c": + docreate = 1 + elif option[0] == "-q": + doquery = True + elif option[0] == "-i": + doquery = True + onlyidxquery = True + elif option[0] == "-I": + doquery = True + onlynonidxquery = True + elif option[0] == "-S": + doquery = True + onlynonidxquery = True + inkernel = False + elif option[0] == "-x": + avoidfscache = 1 + elif option[0] == "-z": + docompress = int(option[1]) + elif option[0] == "-l": + complib = option[1] + elif option[0] == "-R": + rng = [int(i) for i in option[1].split(",")] + elif option[0] == "-N": + niter = int(option[1]) + elif option[0] == "-n": + krows = option[1] + elif option[0] == "-d": + datadir = option[1] + elif option[0] == "-O": + optlevel = int(option[1]) + elif option[0] == "-t": + if option[1] in ("full", "medium", "light", "ultralight"): + kind = option[1] + else: + print( + "kind should be either 'full', 'medium', 'light' or " + "'ultralight'" + ) + sys.exit(1) + elif option[0] == "-s": + if option[1] in ("int", "float"): + dtype = option[1] + else: + print("column should be either 'int' or 'float'") + sys.exit(1) + elif option[0] == "-Q": + repeatquery = 1 + repeatvalue = int(option[1]) + + # If not database backend selected, abort + if not usepytables and not usepostgres: + print("Please select a backend:") + print("PyTables: -T") + print("Postgres: -P") + sys.exit(1) + + # Create the class for the database + if usepytables: + from pytables_backend import PyTablesDB + + db = PyTablesDB( + krows, rng, userandom, datadir, docompress, complib, kind, optlevel + ) + elif usepostgres: + from postgres_backend import PostgresDB + + db = PostgresDB(krows, rng, userandom) + + if not avoidfscache: + # in order to always generate the same random sequence + np.random.seed(20) + + if verbose: + if userandom: + print("using random values") + if onlyidxquery: + print("doing indexed queries only") + + if psyco_imported and usepsyco: + psyco.bind(db.create_db) + psyco.bind(db.query_db) + + if docreate: + if verbose: + print("writing %s rows" % krows) + db.create_db(dtype, kind, optlevel, verbose) + + if doquery: + print("Calling query_db() %s times" % niter) + if doprofile: + import pstats + import cProfile + + cProfile.run( + "db.query_db(niter, dtype, onlyidxquery, onlynonidxquery, " + "avoidfscache, verbose, inkernel)", + "indexed_search.prof", + ) + stats = pstats.Stats("indexed_search.prof") + stats.strip_dirs() + stats.sort_stats("time", "calls") + if verbose: + stats.print_stats() + else: + stats.print_stats(20) + elif dokprofile: + import cProfile + + import lsprofcalltree + + prof = cProfile.Profile() + prof.run( + "db.query_db(niter, dtype, onlyidxquery, onlynonidxquery, " + "avoidfscache, verbose, inkernel)" + ) + kcg = lsprofcalltree.KCacheGrind(prof) + with Path("indexed_search.kcg").open("w") as ofile: + kcg.output(ofile) + elif doprofile: + import hotshot + import hotshot.stats + + prof = hotshot.Profile("indexed_search.prof") + benchtime, stones = prof.run( + "db.query_db(niter, dtype, onlyidxquery, onlynonidxquery, " + "avoidfscache, verbose, inkernel)" + ) + prof.close() + stats = hotshot.stats.load("indexed_search.prof") + stats.strip_dirs() + stats.sort_stats("time", "calls") + stats.print_stats(20) + else: + db.query_db( + niter, + dtype, + onlyidxquery, + onlynonidxquery, + avoidfscache, + verbose, + inkernel, + ) + + if repeatquery: + # Start by a range which is almost None + db.rng = [1, 1] + if verbose: + print("range:", db.rng) + db.query_db( + niter, + dtype, + onlyidxquery, + onlynonidxquery, + avoidfscache, + verbose, + inkernel, + ) + for i in range(repeatvalue): + for j in (1, 2, 5): + rng = j * 10**i + db.rng = [-rng / 2, rng / 2] + if verbose: + print("range:", db.rng) + # if usepostgres: + # os.system( + # "echo 1 > /proc/sys/vm/drop_caches;" + # " /etc/init.d/postgresql restart") + # else: + # os.system("echo 1 > /proc/sys/vm/drop_caches") + db.query_db( + niter, + dtype, + onlyidxquery, + onlynonidxquery, + avoidfscache, + verbose, + inkernel, + ) diff --git a/bench/keysort.py b/bench/keysort.py new file mode 100644 index 0000000..9f8680b --- /dev/null +++ b/bench/keysort.py @@ -0,0 +1,45 @@ +from time import perf_counter as clock + +import numpy as np + +import tables as tb + +N = 1000 * 1000 +rnd = np.random.randint(N, size=N) + +for dtype1 in ( + "S6", + "b1", + "i1", + "i2", + "i4", + "i8", + "u1", + "u2", + "u4", + "u8", + "f4", + "f8", +): + for dtype2 in ("u4", "i8"): + print("dtype array1, array2-->", dtype1, dtype2) + a = np.array(rnd, dtype1) + b = np.arange(N, dtype=dtype2) + c = a.copy() + + t1 = clock() + d = c.argsort() + # c.sort() + # e=c + e = c[d] + f = b[d] + tref = clock() - t1 + print(f"normal sort time--> {tref}") + + t1 = clock() + tb.indexesextension.keysort(a, b) + tks = clock() - t1 + print(f"keysort time--> {tks} {tref / tks:.2f}x") + assert np.all(a == e) + # assert np.all(b == d) + assert np.all(f == d) diff --git a/bench/lookup_bench.py b/bench/lookup_bench.py new file mode 100644 index 0000000..99d3170 --- /dev/null +++ b/bench/lookup_bench.py @@ -0,0 +1,257 @@ +"""Benchmark to help choosing the best chunksize so as to optimize the access +time in random lookups.""" + +import subprocess +from time import perf_counter as clock +from pathlib import Path + +import numpy as np + +import tables as tb + +# Constants +NOISE = 1e-15 # standard deviation of the noise compared with actual values + +rdm_cod = ["lin", "rnd"] + + +def get_nrows(nrows_str): + powers = {"k": 3, "m": 6, "g": 9} + try: + return int(float(nrows_str[:-1]) * 10 ** powers[nrows_str[-1]]) + except KeyError: + raise ValueError( + "value of nrows must end with either 'k', 'm' or 'g' suffixes." + ) + + +class DB: + + def __init__( + self, + nrows, + dtype, + chunksize, + userandom, + datadir, + docompress=0, + complib="zlib", + ): + self.dtype = dtype + self.docompress = docompress + self.complib = complib + self.filename = "-".join( + [rdm_cod[userandom], "n" + nrows, "s" + chunksize, dtype] + ) + # Complete the filename + self.filename = "lookup-" + self.filename + if docompress: + self.filename += "-" + complib + str(docompress) + self.filename = datadir + "/" + self.filename + ".h5" + print("Processing database:", self.filename) + self.userandom = userandom + self.nrows = get_nrows(nrows) + self.chunksize = get_nrows(chunksize) + self.step = self.chunksize + self.scale = NOISE + + def get_db_size(self): + sout = subprocess.Popen( + "sync;du -s %s" % self.filename, shell=True, stdout=subprocess.PIPE + ).stdout + line = sout[0] + return int(line.split()[0]) + + def print_mtime(self, t1, explain): + mtime = clock() - t1 + print(f"{explain}: {mtime:.6f}") + print(f"Krows/s: {self.nrows / 1000 / mtime:.6f}") + + def print_db_sizes(self, init, filled): + array_size = (filled - init) / 1024 + print(f"Array size (MB): {array_size:.3f}") + + def open_db(self, remove=0): + if remove and Path(self.filename).is_file(): + Path(self.filename).unlink() + con = tb.open_file(self.filename, "a") + return con + + def create_db(self, verbose): + self.con = self.open_db(remove=1) + self.create_array() + init_size = self.get_db_size() + t1 = clock() + self.fill_array() + array_size = self.get_db_size() + self.print_mtime(t1, "Insert time") + self.print_db_sizes(init_size, array_size) + self.close_db() + + def create_array(self): + # The filters chosen + filters = tb.Filters(complevel=self.docompress, complib=self.complib) + atom = tb.Atom.from_kind(self.dtype) + self.con.create_earray( + self.con.root, + "earray", + atom, + (0,), + filters=filters, + expectedrows=self.nrows, + chunkshape=(self.chunksize,), + ) + + def fill_array(self): + "Fills the array" + earray = self.con.root.earray + j = 0 + arr = self.get_array(0, self.step) + for i in range(0, self.nrows, self.step): + stop = (j + 1) * self.step + if stop > self.nrows: + stop = self.nrows + # ##arr = self.get_array(i, stop, dtype) + earray.append(arr) + j += 1 + earray.flush() + + def get_array(self, start, stop): + arr = np.arange(start, stop, dtype="float") + if self.userandom: + arr += np.random.normal(0, stop * self.scale, size=stop - start) + arr = arr.astype(self.dtype) + return arr + + def print_qtime(self, ltimes): + ltimes = np.array(ltimes) + print("Raw query times:\n", ltimes) + print("Histogram times:\n", np.histogram(ltimes[1:])) + ntimes = len(ltimes) + qtime1 = ltimes[0] # First measured time + if ntimes > 5: + # Wait until the 5th iteration (in order to + # ensure that the index is effectively cached) to take times + qtime2 = sum(ltimes[5:]) / (ntimes - 5) + else: + qtime2 = ltimes[-1] # Last measured time + print(f"1st query time: {qtime1:.3f}") + print(f"Mean (skipping the first 5 meas.): {qtime2:.3f}") + + def query_db(self, niter, avoidfscache, verbose): + self.con = self.open_db() + earray = self.con.root.earray + if avoidfscache: + rseed = int(np.random.randint(self.nrows)) + else: + rseed = 19 + np.random.seed(rseed) + np.random.randint(self.nrows) + ltimes = [] + for i in range(niter): + t1 = clock() + self.do_query(earray, np.random.randint(self.nrows)) + ltimes.append(clock() - t1) + self.print_qtime(ltimes) + self.close_db() + + def do_query(self, earray, idx): + return earray[idx] + + def close_db(self): + self.con.close() + + +if __name__ == "__main__": + import sys + import getopt + + usage = ( + """usage: %s [-v] [-m] [-c] [-q] [-x] [-z complevel] [-l complib] [-N niter] [-n nrows] [-d datadir] [-t] type [-s] chunksize + -v verbose + -m use random values to fill the array + -q do a (random) lookup + -x choose a different seed for random numbers (i.e. avoid FS cache) + -c create the file + -z compress with zlib (no compression by default) + -l use complib for compression (zlib used by default) + -N number of iterations for reading + -n sets the number of rows in the array + -d directory to save data (default: data.nobackup) + -t select the type for array ('int' or 'float'. def 'float') + -s select the chunksize for array + \n""" + % sys.argv[0] + ) + + try: + opts, pargs = getopt.getopt(sys.argv[1:], "vmcqxz:l:N:n:d:t:s:") + except Exception: + sys.stderr.write(usage) + sys.exit(0) + + # default options + verbose = 0 + userandom = 0 + docreate = 0 + optlevel = 0 + docompress = 0 + complib = "zlib" + doquery = False + avoidfscache = 0 + krows = "1k" + chunksize = "32k" + niter = 50 + datadir = "data.nobackup" + dtype = "float" + + # Get the options + for option in opts: + if option[0] == "-v": + verbose = 1 + elif option[0] == "-m": + userandom = 1 + elif option[0] == "-c": + docreate = 1 + createindex = 1 + elif option[0] == "-q": + doquery = True + elif option[0] == "-x": + avoidfscache = 1 + elif option[0] == "-z": + docompress = int(option[1]) + elif option[0] == "-l": + complib = option[1] + elif option[0] == "-N": + niter = int(option[1]) + elif option[0] == "-n": + krows = option[1] + elif option[0] == "-d": + datadir = option[1] + elif option[0] == "-t": + if option[1] in ("int", "float"): + dtype = option[1] + else: + print("type should be either 'int' or 'float'") + sys.exit(0) + elif option[0] == "-s": + chunksize = option[1] + + if not avoidfscache: + # in order to always generate the same random sequence + np.random.seed(20) + + if verbose: + if userandom: + print("using random values") + + db = DB(krows, dtype, chunksize, userandom, datadir, docompress, complib) + + if docreate: + if verbose: + print("writing %s rows" % krows) + db.create_db(verbose) + + if doquery: + print("Calling query_db() %s times" % niter) + db.query_db(niter, avoidfscache, verbose) diff --git a/bench/open_close-bench.py b/bench/open_close-bench.py new file mode 100644 index 0000000..1bb23db --- /dev/null +++ b/bench/open_close-bench.py @@ -0,0 +1,236 @@ +"""Testbed for open/close PyTables files. + +This uses the HotShot profiler. + +""" + +import os +import sys +import getopt +import pstats +import cProfile +from time import perf_counter as clock +from pathlib import Path + +import tables as tb + +filename = None +niter = 1 + + +def show_stats(explain, tref): + "Show the used memory" + for line in Path("/proc/self/status").read_text().splitlines(): + if line.startswith("VmSize:"): + vmsize = int(line.split()[1]) + elif line.startswith("VmRSS:"): + vmrss = int(line.split()[1]) + elif line.startswith("VmData:"): + vmdata = int(line.split()[1]) + elif line.startswith("VmStk:"): + vmstk = int(line.split()[1]) + elif line.startswith("VmExe:"): + vmexe = int(line.split()[1]) + elif line.startswith("VmLib:"): + vmlib = int(line.split()[1]) + print("WallClock time:", clock() - tref) + print("Memory usage: ******* %s *******" % explain) + print(f"VmSize: {vmsize:>7} kB\tVmRSS: {vmrss:>7} kB") + print(f"VmData: {vmdata:>7} kB\tVmStk: {vmstk:>7} kB") + print(f"VmExe: {vmexe:>7} kB\tVmLib: {vmlib:>7} kB") + + +def check_open_close(): + for i in range(niter): + print( + "------------------ open_close #%s -------------------------" % i + ) + tref = clock() + fileh = tb.open_file(filename) + fileh.close() + show_stats("After closing file", tref) + + +def check_only_open(): + for i in range(niter): + print("------------------ only_open #%s -------------------------" % i) + tref = clock() + fileh = tb.open_file(filename) + show_stats("Before closing file", tref) + fileh.close() + + +def check_full_browse(): + for i in range(niter): + print("------------------ full_browse #%s -----------------------" % i) + tref = clock() + fileh = tb.open_file(filename) + for node in fileh: + pass + fileh.close() + show_stats("After full browse", tref) + + +def check_partial_browse(): + for i in range(niter): + print("------------------ partial_browse #%s --------------------" % i) + tref = clock() + fileh = tb.open_file(filename) + for node in fileh.root.ngroup0.ngroup1: + pass + fileh.close() + show_stats("After closing file", tref) + + +def check_full_browse_attrs(): + for i in range(niter): + print("------------------ full_browse_attrs #%s -----------------" % i) + tref = clock() + fileh = tb.open_file(filename) + for node in fileh: + # Access to an attribute + _ = node._v_attrs.CLASS + fileh.close() + show_stats("After full browse", tref) + + +def check_partial_browse_attrs(): + for i in range(niter): + print("------------------ partial_browse_attrs #%s --------------" % i) + tref = clock() + fileh = tb.open_file(filename) + for node in fileh.root.ngroup0.ngroup1: + # Access to an attribute + _ = node._v_attrs.CLASS + fileh.close() + show_stats("After closing file", tref) + + +def check_open_group(): + for i in range(niter): + print("------------------ open_group #%s ------------------------" % i) + tref = clock() + fileh = tb.open_file(filename) + group = fileh.root.ngroup0.ngroup1 + # Access to an attribute + _ = group._v_attrs.CLASS + fileh.close() + show_stats("After closing file", tref) + + +def check_open_leaf(): + for i in range(niter): + print("------------------ open_leaf #%s -----------------------" % i) + tref = clock() + fileh = tb.open_file(filename) + leaf = fileh.root.ngroup0.ngroup1.array9 + # Access to an attribute + _ = leaf._v_attrs.CLASS + fileh.close() + show_stats("After closing file", tref) + + +if __name__ == "__main__": + + usage = ( + """usage: %s [-v] [-p] [-n niter] [-O] [-o] [-B] [-b] [-g] [-l] [-A] [-a] [-E] [-S] datafile + -v verbose (total dump of profiling) + -p do profiling + -n number of iterations for reading + -O Check open_close + -o Check only_open + -B Check full browse + -b Check partial browse + -A Check full browse and reading one attr each node + -a Check partial browse and reading one attr each node + -g Check open nested group + -l Check open nested leaf + -E Check everything + -S Check everything as subprocess + \n""" + % sys.argv[0] + ) + + try: + opts, pargs = getopt.getopt(sys.argv[1:], "vpn:OoBbAaglESs") + except Exception: + sys.stderr.write(usage) + sys.exit(0) + + progname = sys.argv[0] + args = sys.argv[1:] + + # if we pass too much parameters, abort + if len(pargs) != 1: + sys.stderr.write(usage) + sys.exit(0) + + # default options + verbose = 0 + silent = 0 # if silent, does not print the final statistics + profile = 0 + all_checks = 0 + all_system_checks = 0 + func = [] + + # Checking options + options = ["-O", "-o", "-B", "-b", "-A", "-a", "-g", "-l"] + + # Dict to map options to checking functions + option2func = { + "-O": "check_open_close", + "-o": "check_only_open", + "-B": "check_full_browse", + "-b": "check_partial_browse", + "-A": "check_full_browse_attrs", + "-a": "check_partial_browse_attrs", + "-g": "check_open_group", + "-l": "check_open_leaf", + } + + # Get the options + for option in opts: + if option[0] == "-v": + verbose = 1 + elif option[0] == "-p": + profile = 1 + elif option[0] in option2func: + func.append(option2func[option[0]]) + elif option[0] == "-E": + all_checks = 1 + for opt in options: + func.append(option2func[opt]) + elif option[0] == "-S": + all_system_checks = 1 + elif option[0] == "-s": + silent = 1 + elif option[0] == "-n": + niter = int(option[1]) + + filename = pargs[0] + + tref = clock() + if all_system_checks: + args.remove("-S") # We don't want -S in the options list again + for opt in options: + opts = r"{} \-s {} {}".format(progname, opt, " ".join(args)) + # print "opts-->", opts + os.system("python2.4 %s" % opts) + else: + if profile: + for ifunc in func: + cProfile.run(ifunc + "()", ifunc + ".prof") + stats = pstats.Stats(ifunc + ".prof") + stats.strip_dirs() + stats.sort_stats("time", "calls") + if verbose: + stats.print_stats() + else: + stats.print_stats(20) + else: + for ifunc in func: + eval(ifunc + "()") + + if not silent: + print("------------------ End of run -------------------------") + show_stats("Final statistics (after closing everything)", tref) diff --git a/bench/opteron-stress-test.txt b/bench/opteron-stress-test.txt new file mode 100644 index 0000000..33dc599 --- /dev/null +++ b/bench/opteron-stress-test.txt @@ -0,0 +1,63 @@ +Stress test on a 64 bits AMD Opteron platform +============================================= +2004-02-04. F. Alted + +Platform description: + +4 processors AMD Opteron (64-bits) @ 1.6 GHz and 1 MB cache +8 GB RAM +HD IBM DeskStar 120GXP 80 GB ATA/100 2 MB cache @ 7200 rpm +SuSe Linux Enterprise Server (SLES) +Linux kernel 2.4.21-178-smp +ReiserFS filesystem + +Here's the command to do the stress test: + +time python /tmp/stress-test3.py -l zlib -c 6 -g400 -t 300 -i 20000 /tmp/test-big-zlib-6.h5 +ls -lh /tmp/test-big-zlib-6.h5 + +The output: + +Compression level: 6 +Compression library: zlib +Rows written: 2400000000 Row size: 512 +Time writing rows: 56173.557 s (real) 56154.84 s (cpu) 100% +Write rows/sec: 42724 +Write KB/s : 21362 +Rows read: 2400000000 Row size: 512 Buf size: 39936 +Time reading rows: 29339.936 s (real) 29087.88 s (cpu) 99% +Read rows/sec: 81799 +Read KB/s : 40899 + +real 1425m43.846s +user 1308m34.340s +sys 112m17.100s +-rw-r--r-- 1 falted users 2.7G 2004-02-04 02:25 /tmp/test-big-zlib-6 +.h5 + +The maximum amount of RAM taken by the test should be less than 300 MB (241 +MB when the test was running for 5750 minutes, which is the last time I've +check for it). + + +Another test with the same machine: + +time python /tmp/stress-test3.py -l zlib -c 6 -g400 -t 300 -i 100000 /tmp/test-big-zlib-6-2.h5 +ls -lh /tmp/test-big-zlib-6-2.h5 + +Compression level: 6 +Compression library: zlib +Rows written: 12000000000 Row size: 512 +Time writing rows: 262930.901 s (real) 262619.72 s (cpu) 100% +Write rows/sec: 45639 +Write KB/s : 22819 +Rows read: 12000000000 Row size: 512 Buf size: 49664 +Time reading rows: 143171.761 s (real) 141560.42 s (cpu) 99% +Read rows/sec: 83815 +Read KB/s : 41907 + +real 6768m34.076s +user 6183m38.690s +sys 552m51.150s +-rw-r--r-- 1 5350 users 11G 2004-02-09 00:57 /tmp/test-big-zlib-6 +-2.h5 diff --git a/bench/optimal-chunksize.py b/bench/optimal-chunksize.py new file mode 100644 index 0000000..17c33ed --- /dev/null +++ b/bench/optimal-chunksize.py @@ -0,0 +1,134 @@ +"""Small benchmark on the effect of chunksizes and compression on HDF5 files. + +Francesc Alted +2007-11-25 + +""" + +import math +import tempfile +import subprocess +from time import perf_counter as clock +from pathlib import Path + +import numpy as np + +import tables as tb + +# Size of dataset +# N, M = 512, 2**16 # 256 MB +# N, M = 512, 2**18 # 1 GB +# N, M = 512, 2**19 # 2 GB +N, M = 2000, 1_000_000 # 15 GB +# N, M = 4000, 1000000 # 30 GB +datom = tb.Float64Atom() # elements are double precision + + +def quantize(data, least_significant_digit): + """Quantize data to improve compression. + + data is quantized using around(scale*data)/scale, where scale is + 2**bits, and bits is determined from the least_significant_digit. + For example, if least_significant_digit=1, bits will be 4. + + """ + + precision = 10**-least_significant_digit + exp = math.log(precision, 10) + if exp < 0: + exp = math.floor(exp) + else: + exp = math.ceil(exp) + bits = math.ceil(math.log(10**-exp, 2)) + scale = 2**bits + return np.around(scale * data) / scale + + +def get_db_size(filename): + sout = subprocess.Popen( + "ls -sh %s" % filename, shell=True, stdout=subprocess.PIPE + ).stdout + line = sout[0] + return line.split()[0] + + +def bench(chunkshape, filters): + np.random.seed(1) # to have reproductible results + filename = tempfile.NamedTemporaryFile(suffix=".h5").name + print("Doing test on the file system represented by:", filename) + + f = tb.open_file(filename, "w") + e = f.create_earray( + f.root, + "earray", + datom, + shape=(0, M), + filters=filters, + chunkshape=chunkshape, + ) + # Fill the array + t1 = clock() + for i in range(N): + # e.append([np.random.rand(M)]) # use this for less compressibility + e.append([quantize(np.random.rand(M), 6)]) + # os.system("sync") + print(f"Creation time: {clock() - t1:.3f}", end=" ") + filesize = get_db_size(filename) + filesize_bytes = Path(filename).stat().st_size + print("\t\tFile size: %d -- (%s)" % (filesize_bytes, filesize)) + + # Read in sequential mode: + e = f.root.earray + t1 = clock() + # Flush everything to disk and flush caches + # os.system("sync; echo 1 > /proc/sys/vm/drop_caches") + for row in e: + _ = row + print(f"Sequential read time: {clock() - t1:.3f}", end=" ") + + # f.close() + # return + + # Read in random mode: + i_index = np.random.randint(0, N, 128) + j_index = np.random.randint(0, M, 256) + # Flush everything to disk and flush caches + # os.system("sync; echo 1 > /proc/sys/vm/drop_caches") + + # Protection against too large chunksizes + # 4 MB + if 0 and filters.complevel and chunkshape[0] * chunkshape[1] * 8 > 2**22: + f.close() + return + + t1 = clock() + for i in i_index: + for j in j_index: + _ = e[i, j] + print(f"\tRandom read time: {clock() - t1:.3f}") + + f.close() + + +# Benchmark with different chunksizes and filters +# for complevel in (0, 1, 3, 6, 9): +for complib in (None, "zlib", "lzo", "blosc"): + # for complib in ('blosc',): + if complib: + filters = tb.Filters(complevel=5, complib=complib) + else: + filters = tb.Filters(complevel=0) + print("8<--" * 20, "\nFilters:", filters, "\n" + "-" * 80) + # for ecs in (11, 14, 17, 20, 21, 22): + for ecs in range(10, 24): + # for ecs in (19,): + chunksize = 2**ecs + chunk1 = 1 + chunk2 = chunksize / datom.itemsize + if chunk2 > M: + chunk1 = chunk2 / M + chunk2 = M + chunkshape = (chunk1, chunk2) + cs_str = str(chunksize / 1024) + " KB" + print("***** Chunksize:", cs_str, "/ Chunkshape:", chunkshape, "*****") + bench(chunkshape, filters) diff --git a/bench/plot-bar.py b/bench/plot-bar.py new file mode 100644 index 0000000..ce626fe --- /dev/null +++ b/bench/plot-bar.py @@ -0,0 +1,134 @@ +#!/usr/bin/env python +# a stacked bar plot with errorbars + +from pathlib import Path + +import numpy as np +from matplotlib import pyplot as plt + +checks = [ + "open_close", + "only_open", + "full_browse", + "partial_browse", + "full_browse_attrs", + "partial_browse_attrs", + "open_group", + "open_leaf", + "total", +] +width = 0.15 # the width of the bars: can also be len(x) sequence +colors = ["r", "m", "g", "y", "b"] +ind = np.arange(len(checks)) # the x locations for the groups + + +def get_values(filename): + values = [] + for line in Path(filename).read_text().splitlines(): + if show_memory: + if line.startswith("VmData:"): + values.append(float(line.split()[1]) / 1024) + else: + if line.startswith("WallClock time:"): + values.append(float(line.split(":")[1])) + return values + + +def plot_bar(values, n): + global ind + if not gtotal: + # Remove the grand totals + values.pop() + if n == 0: + checks.pop() + ind = np.arange(len(checks)) + p = plt.bar(ind + width * n, values, width, color=colors[n]) + return p + + +def show_plot(bars, filenames, tit): + if show_memory: + plt.ylabel("Memory (MB)") + else: + plt.ylabel("Time (s)") + plt.title(tit) + n = len(filenames) + plt.xticks( + ind + width * n / 2, + checks, + rotation=45, + horizontalalignment="right", + fontsize=8, + ) + if not gtotal: + # loc = 'center right' + loc = "upper left" + else: + loc = "center left" + + legends = [filename[: filename.index("_")] for filename in filenames] + legends = [line.replace("-", " ") for line in legends] + plt.legend([p[0] for p in bars], legends, loc=loc) + + plt.subplots_adjust(bottom=0.2, top=None, wspace=0.2, hspace=0.2) + if outfile: + plt.savefig(outfile) + else: + plt.show() + + +if __name__ == "__main__": + + import sys + import getopt + + usage = ( + """usage: %s [-g] [-m] [-o file] [-t title] files + -g grand total + -m show memory instead of time + -o filename for output (only .png and .jpg extensions supported) + -t title of the plot + \n""" + % sys.argv[0] + ) + + try: + opts, pargs = getopt.getopt(sys.argv[1:], "gmo:t:") + except Exception: + sys.stderr.write(usage) + sys.exit(0) + + progname = sys.argv[0] + args = sys.argv[1:] + + # if we pass too few parameters, abort + if len(pargs) < 1: + sys.stderr.write(usage) + sys.exit(0) + + # default options + tit = "Comparison of differents PyTables versions" + gtotal = 0 + show_memory = 0 + outfile = None + + # Get the options + for option in opts: + if option[0] == "-g": + gtotal = 1 + elif option[0] == "-m": + show_memory = 1 + elif option[0] == "-o": + outfile = option[1] + elif option[0] == "-t": + tit = option[1] + + filenames = pargs + bars = [] + n = 0 + for filename in filenames: + values = get_values(filename) + print("Values-->", values) + bars.append(plot_bar(values, n)) + n += 1 + show_plot(bars, filenames, tit) diff --git a/bench/plot-comparison-lzo-zlib-ucl.gnuplot b/bench/plot-comparison-lzo-zlib-ucl.gnuplot new file mode 100644 index 0000000..f8e9576 --- /dev/null +++ b/bench/plot-comparison-lzo-zlib-ucl.gnuplot @@ -0,0 +1,26 @@ +#set term post color +set term post eps color +set xlabel "Number of rows" +set ylabel "Speed (Krow/s)" + +set linestyle 1 lw 7 +set linestyle 2 lw 7 +set linestyle 3 lw 7 +set linestyle 4 lw 7 +set logscale x + +# For small record size +set output "read-small-lzo-zlib-ucl-comparison.eps" +set tit "Selecting with small record size (16 bytes)" +pl [1000:] [0:1000] "small-nc.out" u ($1):($10) t "No compression" w linesp ls 1, \ + "small-zlib.out" u ($1):($10) t "ZLIB" w linesp ls 2, \ + "small-lzo.out" u ($1):($10) t "LZO" w linesp ls 3, \ + "small-ucl.out" u ($1):($10) t "UCL" w linesp ls 4 + +# For small record size +set output "write-small-lzo-zlib-ucl-comparison.eps" +set tit "Writing with small record size (16 bytes)" +pl [1000:] [0:500] "small-nc.out" u ($1):($5) tit "No compression" w linesp ls 1, \ + "small-zlib.out" u ($1):($5) tit "ZLIB" w linesp ls 2, \ + "small-lzo.out" u ($1):($5) tit "LZO" w linesp ls 3, \ + "small-ucl.out" u ($1):($5) tit "UCL" w linesp ls 4 diff --git a/bench/plot-comparison-psyco-lzo.gnuplot b/bench/plot-comparison-psyco-lzo.gnuplot new file mode 100644 index 0000000..346e214 --- /dev/null +++ b/bench/plot-comparison-psyco-lzo.gnuplot @@ -0,0 +1,27 @@ +#set term post color +set term post eps color +set xlabel "Number of rows" +set ylabel "Speed (Krow/s)" + +set linestyle 1 lw 7 +set linestyle 2 lw 7 +set linestyle 3 lw 7 +set linestyle 4 lw 7 + +# For small record size +set output "read-small-psyco-lzo-comparison.eps" +set tit "Selecting with small record size (16 bytes)" +set logscale x +pl [1000:] [0:1200] "small-psyco-lzo.out" u ($1):($10) t "Psyco & compression (LZO)" w linesp ls 2, \ + "small-psyco-nc.out" u ($1):($10) tit "Psyco & no compresion" w linesp ls 3, \ + "small-lzo.out" u ($1):($10) t "No Psyco & compression (LZO)" w linesp ls 1, \ + "small-nc.out" u ($1):($10) tit "No Psyco & no compression" w linesp ls 4 + +# For small record size +set output "write-small-psyco-lzo-comparison.eps" +set tit "Writing with small record size (16 bytes)" +set logscale x +pl [1000:] [0:1000] "small-psyco-lzo.out" u ($1):($5) t "Psyco & compression (LZO)" w linesp ls 2, \ + "small-psyco-nc.out" u ($1):($5) tit "Psyco & no compresion" w linesp ls 3, \ + "small-lzo.out" u ($1):($5) t "No Psyco & compression (LZO)" w linesp ls 1, \ + "small-nc.out" u ($1):($5) tit "No Psyco & no compression" w linesp ls 4 diff --git a/bench/poly.py b/bench/poly.py new file mode 100644 index 0000000..5a342e2 --- /dev/null +++ b/bench/poly.py @@ -0,0 +1,199 @@ +"""This script compares the speed of the computation of a polynomial for +different (numpy.memmap and tables.Expr) out-of-memory paradigms.""" + +from time import perf_counter as clock +from pathlib import Path + +import numpy as np +import numexpr as ne + +import tables as tb + +expr = ".25*x**3 + .75*x**2 - 1.5*x - 2" # the polynomial to compute +N = 10 * 1000 * 1000 # the number of points to compute expression (80 MB) +step = 100 * 1000 # perform calculation in slices of `step` elements +dtype = np.dtype("f8") # the datatype +# CHUNKSHAPE = (2**17,) +CHUNKSHAPE = None + +# Global variable for the x values for pure numpy & numexpr +x = None + +# *** The next variables do not need to be changed *** + +# Filenames for numpy.memmap +fprefix = "numpy.memmap" # the I/O file prefix +mpfnames = [fprefix + "-x.bin", fprefix + "-r.bin"] + +# Filename for tables.Expr +h5fname = "tablesExpr.h5" # the I/O file + +MB = 1024 * 1024 # a MegaByte + + +def print_filesize(filename, clib=None, clevel=0): + """Print some statistics about file sizes.""" + + # os.system("sync") # make sure that all data has been flushed to disk + if isinstance(filename, list): + filesize_bytes = sum(Path(fname).stat().st_size for fname in filename) + else: + filesize_bytes = Path(filename).stat().st_size + print( + f"\t\tTotal file sizes: {filesize_bytes} -- " + f"({filesize_bytes / MB:.1f} MB)", + end=" ", + ) + if clevel > 0: + print(f"(using {clib} lvl{clevel})") + else: + print() + + +def populate_x_numpy(): + """Populate the values in x axis for numpy.""" + global x + # Populate x in range [-1, 1] + x = np.linspace(-1, 1, N) + + +def populate_x_memmap(): + """Populate the values in x axis for numpy.memmap.""" + # Create container for input + x = np.memmap(mpfnames[0], dtype=dtype, mode="w+", shape=(N,)) + + # Populate x in range [-1, 1] + for i in range(0, N, step): + chunk = np.linspace((2 * i - N) / N, (2 * (i + step) - N) / N, step) + x[i : i + step] = chunk + del x # close x memmap + + +def populate_x_tables(clib, clevel): + """Populate the values in x axis for pytables.""" + f = tb.open_file(h5fname, "w") + + # Create container for input + atom = tb.Atom.from_dtype(dtype) + filters = tb.Filters(complib=clib, complevel=clevel) + x = f.create_carray( + f.root, + "x", + atom=atom, + shape=(N,), + filters=filters, + chunkshape=CHUNKSHAPE, + ) + + # Populate x in range [-1, 1] + for i in range(0, N, step): + chunk = np.linspace((2 * i - N) / N, (2 * (i + step) - N) / N, step) + x[i : i + step] = chunk + f.close() + + +def compute_numpy(): + """Compute the polynomial with pure numpy.""" + _ = eval(expr) + + +def compute_numexpr(): + """Compute the polynomial with pure numexpr.""" + _ = ne.evaluate(expr) + + +def compute_memmap(): + """Compute the polynomial with numpy.memmap.""" + # Reopen inputs in read-only mode + x = np.memmap(mpfnames[0], dtype=dtype, mode="r", shape=(N,)) + # Create the array output + r = np.memmap(mpfnames[1], dtype=dtype, mode="w+", shape=(N,)) + + # Do the computation by chunks and store in output + r[:] = eval(expr) # where is stored the result? + # r = eval(expr) # result is stored in-memory + + del x, r # close x and r memmap arrays + print_filesize(mpfnames) + + +def compute_tables(clib, clevel): + """Compute the polynomial with tables.Expr.""" + f = tb.open_file(h5fname, "a") + _ = f.root.x # get the x input + # Create container for output + atom = tb.Atom.from_dtype(dtype) + filters = tb.Filters(complib=clib, complevel=clevel) + r = f.create_carray( + f.root, + "r", + atom=atom, + shape=(N,), + filters=filters, + chunkshape=CHUNKSHAPE, + ) + + # Do the actual computation and store in output + ex = tb.Expr(expr) # parse the expression + ex.set_output(r) # where is stored the result? + # when commented out, the result goes in-memory + ex.eval() # evaluate! + + f.close() + print_filesize(h5fname, clib, clevel) + + +if __name__ == "__main__": + + tb.print_versions() + + print(f"Total size for datasets: {2 * N * dtype.itemsize / MB:.1f} MB") + + # Get the compression libraries supported + # supported_clibs = [clib for clib in ("zlib", "lzo", "bzip2", "blosc") + # supported_clibs = [clib for clib in ("zlib", "lzo", "blosc") + supported_clibs = [ + clib for clib in ("blosc",) if tb.which_lib_version(clib) + ] + + # Initialization code + # for what in ["numpy", "numpy.memmap", "numexpr"]: + for what in ["numpy", "numexpr"]: + # break + print("Populating x using %s with %d points..." % (what, N)) + t0 = clock() + if what == "numpy": + populate_x_numpy() + compute = compute_numpy + elif what == "numexpr": + populate_x_numpy() + compute = compute_numexpr + elif what == "numpy.memmap": + populate_x_memmap() + compute = compute_memmap + print(f"*** Time elapsed populating: {clock() - t0:.3f}") + print(f"Computing: {expr!r} using {what}") + t0 = clock() + compute() + print(f"**************** Time elapsed computing: {clock() - t0:.3f}") + + for what in ["tables.Expr"]: + t0 = clock() + first = True # Sentinel + for clib in supported_clibs: + # for clevel in (0, 1, 3, 6, 9): + for clevel in range(10): + # for clevel in (1,): + if not first and clevel == 0: + continue + print("Populating x using %s with %d points..." % (what, N)) + populate_x_tables(clib, clevel) + print(f"*** Time elapsed populating: {clock() - t0:.3f}") + print(f"Computing: {expr!r} using {what}") + t0 = clock() + compute_tables(clib, clevel) + print( + f"**************** Time elapsed computing: " + f"{clock() - t0:.3f}" + ) + first = False diff --git a/bench/postgres-search-bench.py b/bench/postgres-search-bench.py new file mode 100644 index 0000000..0fc346b --- /dev/null +++ b/bench/postgres-search-bench.py @@ -0,0 +1,258 @@ +import random +from time import perf_counter as clock + +import numpy as np + +DSN = "dbname=test port = 5435" + +# in order to always generate the same random sequence +random.seed(19) + + +def flatten(line): + """Flattens list of tuples l.""" + return [x[0] for x in line] + + +def fill_arrays(start, stop): + col_i = np.arange(start, stop, type=np.int32) + if userandom: + col_j = np.random.uniform(0, nrows, size=[stop - start]) + else: + col_j = np.array(col_i, type=np.float64) + return col_i, col_j + + +# Generator for ensure pytables benchmark compatibility + + +def int_generator(nrows): + step = 1000 * 100 + j = 0 + for i in range(nrows): + if i >= step * j: + stop = (j + 1) * step + if stop > nrows: # Seems unnecessary + stop = nrows + col_i, col_j = fill_arrays(i, stop) + j += 1 + k = 0 + yield (col_i[k], col_j[k]) + k += 1 + + +def int_generator_slow(nrows): + for i in range(nrows): + if userandom: + yield (i, float(random.randint(0, nrows))) + else: + yield (i, float(i)) + + +class Stream32: + + "Object simulating a file for reading" + + def __init__(self): + self.n = None + self.read_it = self.read_iter() + + # No va! Hi ha que convertir a un de normal! + def readline(self, n=None): + for tup in int_generator(nrows): + sout = "%s\t%s\n" % tup + if n is not None and len(sout) > n: + for i in range(0, len(sout), n): + yield sout[i : i + n] + else: + yield sout + + def read_iter(self): + sout = "" + n = self.n + for tup in int_generator(nrows): + sout += "%s\t%s\n" % tup + if n is not None and len(sout) > n: + for i in range(n, len(sout), n): + rout = sout[:n] + sout = sout[n:] + yield rout + yield sout + + def read(self, n=None): + self.n = n + try: + str_ = next(self.read_it) + except StopIteration: + str_ = "" + return str_ + + +def open_db(filename, remove=0): + if not filename: + con = sqlite.connect(DSN) + else: + con = sqlite.connect(filename) + cur = con.cursor() + return con, cur + + +def create_db(filename, nrows): + con, cur = open_db(filename, remove=1) + try: + cur.execute("create table ints(i integer, j double precision)") + except Exception: + con.rollback() + cur.execute("DROP TABLE ints") + cur.execute("create table ints(i integer, j double precision)") + con.commit() + con.set_isolation_level(2) + t1 = clock() + st = Stream32() + cur.copy_from(st, "ints") + # In case of postgres, the speeds of generator and loop are similar + # cur.executemany("insert into ints values (%s,%s)", int_generator(nrows)) + # for i in xrange(nrows): + # cur.execute("insert into ints values (%s,%s)", (i, float(i))) + con.commit() + ctime = clock() - t1 + if verbose: + print(f"insert time: {ctime:.5f}") + print(f"Krows/s: {nrows / 1000 / ctime:.5f}") + close_db(con, cur) + + +def index_db(filename): + con, cur = open_db(filename) + t1 = clock() + cur.execute("create index ij on ints(j)") + con.commit() + itime = clock() - t1 + if verbose: + print(f"index time: {itime:.5f}") + print(f"Krows/s: {nrows / itime:.5f}") + # Close the DB + close_db(con, cur) + + +def query_db(filename, rng): + con, cur = open_db(filename) + t1 = clock() + ntimes = 10 + for i in range(ntimes): + # between clause does not seem to take advantage of indexes + # cur.execute("select j from ints where j between %s and %s" % \ + cur.execute( + "select i from ints where j >= %s and j <= %s" + % + # cur.execute("select i from ints where i >= %s and i <= + # %s" % + (rng[0] + i, rng[1] + i) + ) + results = cur.fetchall() + con.commit() + qtime = (clock() - t1) / ntimes + if verbose: + print(f"query time: {qtime:.5f}") + print(f"Mrows/s: {nrows / 1000 / qtime:.5f}") + results = sorted(flatten(results)) + print(results) + close_db(con, cur) + + +def close_db(con, cur): + cur.close() + con.close() + + +if __name__ == "__main__": + import sys + import getopt + + try: + import psyco + + psyco_imported = 1 + except Exception: + psyco_imported = 0 + + usage = ( + """usage: %s [-v] [-p] [-m] [-i] [-q] [-c] [-R range] [-n nrows] file + -v verbose + -p use "psyco" if available + -m use random values to fill the table + -q do query + -c create the database + -i index the table + -2 use sqlite2 (default is use sqlite3) + -R select a range in a field in the form "start,stop" (def "0,10") + -n sets the number of rows (in krows) in each table + \n""" + % sys.argv[0] + ) + + try: + opts, pargs = getopt.getopt(sys.argv[1:], "vpmiqc2R:n:") + except Exception: + sys.stderr.write(usage) + sys.exit(0) + + # default options + verbose = 0 + usepsyco = 0 + userandom = 0 + docreate = 0 + createindex = 0 + doquery = 0 + sqlite_version = "3" + rng = [0, 10] + nrows = 1 + + # Get the options + for option in opts: + if option[0] == "-v": + verbose = 1 + elif option[0] == "-p": + usepsyco = 1 + elif option[0] == "-m": + userandom = 1 + elif option[0] == "-i": + createindex = 1 + elif option[0] == "-q": + doquery = 1 + elif option[0] == "-c": + docreate = 1 + elif option[0] == "-2": + sqlite_version = "2" + elif option[0] == "-R": + rng = [int(i) for i in option[1].split(",")] + elif option[0] == "-n": + nrows = int(option[1]) + + # Catch the hdf5 file passed as the last argument + filename = pargs[0] + + # if sqlite_version == "2": + # import sqlite + # else: + # from pysqlite2 import dbapi2 as sqlite + import psycopg2 as sqlite + + if verbose: + # print "pysqlite version:", sqlite.version + if userandom: + print("using random values") + + if docreate: + if verbose: + print("writing %s krows" % nrows) + if psyco_imported and usepsyco: + psyco.bind(create_db) + nrows *= 1000 + create_db(filename, nrows) + + if createindex: + index_db(filename) + + if doquery: + query_db(filename, rng) diff --git a/bench/postgres_backend.py b/bench/postgres_backend.py new file mode 100644 index 0000000..792a63f --- /dev/null +++ b/bench/postgres_backend.py @@ -0,0 +1,171 @@ +import subprocess + +import psycopg2 as db2 +from indexed_search import DB + +CLUSTER_NAME = "base" +DATA_DIR = "/scratch2/postgres/data/%s" % CLUSTER_NAME +# DATA_DIR = "/var/lib/pgsql/data/%s" % CLUSTER_NAME +DSN = "dbname=%s port=%s" +CREATE_DB = "createdb %s" +DROP_DB = "dropdb %s" +TABLE_NAME = "intsfloats" +PORT = 5432 + + +class StreamChar: + "Object simulating a file for reading" + + def __init__(self, db): + self.db = db + self.nrows = db.nrows + self.step = db.step + self.read_it = self.read_iter() + + def values_generator(self): + j = 0 + for i in range(self.nrows): + if i >= j * self.step: + stop = (j + 1) * self.step + if stop > self.nrows: + stop = self.nrows + arr_i4, arr_f8 = self.db.fill_arrays(i, stop) + j += 1 + k = 0 + yield (arr_i4[k], arr_i4[k], arr_f8[k], arr_f8[k]) + k += 1 + + def read_iter(self): + sout = "" + n = self.nbytes + for tup in self.values_generator(): + sout += "%s\t%s\t%s\t%s\n" % tup + if n is not None and len(sout) > n: + for i in range(n, len(sout), n): + rout = sout[:n] + sout = sout[n:] + yield rout + yield sout + + def read(self, n=None): + self.nbytes = n + try: + str_ = next(self.read_it) + except StopIteration: + str_ = "" + return str_ + + # required by postgres2 driver, but not used + def readline(self): + pass + + +class PostgresDB(DB): + + def __init__(self, nrows, rng, userandom): + DB.__init__(self, nrows, rng, userandom) + self.port = PORT + + def flatten(self, line): + """Flattens list of tuples l.""" + return [x[0] for x in line] + # return map(lambda x: x[col], line) + + # Overloads the method in DB class + def get_db_size(self): + sout = subprocess.Popen( + "sudo du -s %s" % DATA_DIR, shell=True, stdout=subprocess.PIPE + ).stdout + line = sout[0] + return int(line.split()[0]) + + def open_db(self, remove=0): + if remove: + sout = subprocess.Popen( + DROP_DB % self.filename, shell=True, stdout=subprocess.PIPE + ).stdout + for line in sout: + print(line) + sout = subprocess.Popen( + CREATE_DB % self.filename, shell=True, stdout=subprocess.PIPE + ).stdout + for line in sout: + print(line) + + print("Processing database:", self.filename) + con = db2.connect(DSN % (self.filename, self.port)) + self.cur = con.cursor() + return con + + def create_table(self, con): + self.cur.execute( + """create table %s( + col1 integer, + col2 integer, + col3 double precision, + col4 double precision)""" + % TABLE_NAME + ) + con.commit() + + def fill_table(self, con): + st = StreamChar(self) + self.cur.copy_from(st, TABLE_NAME) + con.commit() + + def index_col(self, con, colname, optlevel, idxtype, verbose): + self.cur.execute( + "create index {} on {}({})".format( + colname + "_idx", TABLE_NAME, colname + ) + ) + con.commit() + + def do_query_simple(self, con, column, base): + self.cur.execute( + "select sum(%s) from %s where %s >= %s and %s <= %s" + % ( + column, + TABLE_NAME, + column, + base + self.rng[0], + column, + base + self.rng[1], + ) + ) + # "select * from %s where %s >= %s and %s <= %s" % \ + # (TABLE_NAME, + # column, base+self.rng[0], + # column, base+self.rng[1])) + # results = self.flatten(self.cur.fetchall()) + results = self.cur.fetchall() + return results + + def do_query(self, con, column, base, *unused): + d = (self.rng[1] - self.rng[0]) / 2 + inf1 = int(self.rng[0] + base) + sup1 = int(self.rng[0] + d + base) + inf2 = self.rng[0] + base * 2 + sup2 = self.rng[0] + d + base * 2 + # print "lims-->", inf1, inf2, sup1, sup2 + condition = "((%s>=%s) and (%s<%s)) or ((col2>%s) and (col2<%s))" + # condition = "((col3>=%s) and (col3<%s)) or ((col1>%s) and (col1<%s))" + condition += " and ((col1+3.1*col2+col3*col4) > 3)" + # condition += " and (sqrt(col1^2+col2^2+col3^2+col4^2) > .1)" + condition = condition % (column, inf2, column, sup2, inf1, sup1) + # print "condition-->", condition + self.cur.execute( + # "select sum(%s) from %s where %s" % + "select %s from %s where %s" + % (column, TABLE_NAME, condition) + ) + # results = self.flatten(self.cur.fetchall()) + results = self.cur.fetchall() + # results = self.cur.fetchall() + # print "results-->", results + # return results + return len(results) + + def close_db(self, con): + self.cur.close() + con.close() diff --git a/bench/pytables-search-bench.py b/bench/pytables-search-bench.py new file mode 100644 index 0000000..52720e9 --- /dev/null +++ b/bench/pytables-search-bench.py @@ -0,0 +1,235 @@ +import random +from time import perf_counter as clock +from pathlib import Path + +import numpy as np + +import tables as tb + +# in order to always generate the same random sequence +random.seed(19) +np.random.seed((19, 20)) + + +def open_db(filename, remove=0): + if remove and Path(filename).is_file(): + Path(filename).unlink() + con = tb.open_file(filename, "a") + return con + + +def create_db(filename, nrows): + + class Record(tb.IsDescription): + col1 = tb.Int32Col() + col2 = tb.Int32Col() + col3 = tb.Float64Col() + col4 = tb.Float64Col() + + con = open_db(filename, remove=1) + table = con.create_table( + con.root, "table", Record, filters=filters, expectedrows=nrows + ) + table.indexFilters = filters + step = 1000 * 100 + scale = 0.1 + t1 = clock() + j = 0 + for i in range(0, nrows, step): + stop = (j + 1) * step + if stop > nrows: + stop = nrows + arr_f8 = np.arange(i, stop, type=np.float64) + arr_i4 = np.arange(i, stop, type=np.int32) + if userandom: + arr_f8 += np.random.normal(0, stop * scale, shape=[stop - i]) + arr_i4 = np.array(arr_f8, type=np.int32) + recarr = np.rec.fromarrays([arr_i4, arr_i4, arr_f8, arr_f8]) + table.append(recarr) + j += 1 + table.flush() + ctime = clock() - t1 + if verbose: + print(f"insert time: {ctime:.5f}") + print(f"Krows/s: {nrows / 1000 / ctime:.5f}") + index_db(table) + close_db(con) + + +def index_db(table): + t1 = clock() + table.cols.col2.create_index() + itime = clock() - t1 + if verbose: + print(f"index time (int): {itime:.5f}") + print(f"Krows/s: {nrows / 1000 / itime:.5f}") + t1 = clock() + table.cols.col4.create_index() + itime = clock() - t1 + if verbose: + print(f"index time (float): {itime:.5f}") + print(f"Krows/s: {nrows / 1000 / itime:.5f}") + + +def query_db(filename, rng): + con = open_db(filename) + table = con.root.table + # Query for integer columns + # Query for non-indexed column + if not doqueryidx: + t1 = clock() + ntimes = 10 + for i in range(ntimes): + results = [ + r["col1"] + for r in table.where( + rng[0] + i <= table.cols.col1 <= rng[1] + i + ) + ] + qtime = (clock() - t1) / ntimes + if verbose: + print(f"query time (int, not indexed): {qtime:.5f}") + print(f"Krows/s: {nrows / 1000 / qtime:.5f}") + print(results) + # Query for indexed column + t1 = clock() + ntimes = 10 + for i in range(ntimes): + results = [ + r["col1"] + for r in table.where(rng[0] + i <= table.cols.col2 <= rng[1] + i) + ] + qtime = (clock() - t1) / ntimes + if verbose: + print(f"query time (int, indexed): {qtime:.5f}") + print(f"Krows/s: {nrows / 1000 / qtime:.5f}") + print(results) + # Query for floating columns + # Query for non-indexed column + if not doqueryidx: + t1 = clock() + ntimes = 10 + for i in range(ntimes): + results = [ + r["col3"] + for r in table.where( + rng[0] + i <= table.cols.col3 <= rng[1] + i + ) + ] + qtime = (clock() - t1) / ntimes + if verbose: + print(f"query time (float, not indexed): {qtime:.5f}") + print(f"Krows/s: {nrows / 1000 / qtime:.5f}") + print(results) + # Query for indexed column + t1 = clock() + ntimes = 10 + for i in range(ntimes): + results = [ + r["col3"] + for r in table.where(rng[0] + i <= table.cols.col4 <= rng[1] + i) + ] + qtime = (clock() - t1) / ntimes + if verbose: + print(f"query time (float, indexed): {qtime:.5f}") + print(f"Krows/s: {nrows / 1000 / qtime:.5f}") + print(results) + close_db(con) + + +def close_db(con): + con.close() + + +if __name__ == "__main__": + import sys + import getopt + + try: + import psyco + + psyco_imported = 1 + except Exception: + psyco_imported = 0 + + usage = ( + """usage: %s [-v] [-p] [-m] [-c] [-q] [-i] [-z complevel] [-l complib] [-R range] [-n nrows] file + -v verbose + -p use "psyco" if available + -m use random values to fill the table + -q do a query (both indexed and non-indexed version) + -i do a query (exclude non-indexed version) + -c create the database + -z compress with zlib (no compression by default) + -l use complib for compression (zlib used by default) + -R select a range in a field in the form "start,stop" (def "0,10") + -n sets the number of rows (in krows) in each table + \n""" + % sys.argv[0] + ) + + try: + opts, pargs = getopt.getopt(sys.argv[1:], "vpmcqiz:l:R:n:") + except Exception: + sys.stderr.write(usage) + sys.exit(0) + + # default options + verbose = 0 + usepsyco = 0 + userandom = 0 + docreate = 0 + docompress = 0 + complib = "zlib" + doquery = 0 + doqueryidx = 0 + rng = [0, 10] + nrows = 1 + + # Get the options + for option in opts: + if option[0] == "-v": + verbose = 1 + elif option[0] == "-p": + usepsyco = 1 + elif option[0] == "-m": + userandom = 1 + elif option[0] == "-c": + docreate = 1 + createindex = 1 + elif option[0] == "-q": + doquery = 1 + elif option[0] == "-i": + doqueryidx = 1 + elif option[0] == "-z": + docompress = int(option[1]) + elif option[0] == "-l": + complib = option[1] + elif option[0] == "-R": + rng = [int(i) for i in option[1].split(",")] + elif option[0] == "-n": + nrows = int(option[1]) + + # Catch the hdf5 file passed as the last argument + filename = pargs[0] + + # The filters chosen + filters = tb.Filters(complevel=docompress, complib=complib) + + if verbose: + print("pytables version:", tb.__version__) + if userandom: + print("using random values") + if doqueryidx: + print("doing indexed queries only") + + if docreate: + if verbose: + print("writing %s krows" % nrows) + if psyco_imported and usepsyco: + psyco.bind(create_db) + nrows *= 1000 + create_db(filename, nrows) + + if doquery: + query_db(filename, rng) diff --git a/bench/pytables_backend.py b/bench/pytables_backend.py new file mode 100644 index 0000000..e45d772 --- /dev/null +++ b/bench/pytables_backend.py @@ -0,0 +1,222 @@ +from pathlib import Path + +from indexed_search import DB + +import tables as tb + + +class PyTablesDB(DB): + + def __init__( + self, + nrows, + rng, + userandom, + datadir, + docompress=0, + complib="zlib", + kind="medium", + optlevel=6, + ): + DB.__init__(self, nrows, rng, userandom) + self.tprof = [] + # Specific part for pytables + self.docompress = docompress + self.complib = complib + # Complete the filename + self.filename = "pro-" + self.filename + self.filename += "-" + "O%s" % optlevel + self.filename += "-" + kind + if docompress: + self.filename += "-" + complib + str(docompress) + self.datadir = datadir + path = Path(self.datadir) + if not path.is_dir(): + if not path.is_absolute(): + dir_path = Path(".") / self.datadir + else: + dir_path = Path(self.datadir) + dir_path.mkdir(parents=True, exist_ok=True) + self.datadir = dir_path + print(f"Created {self.datadir}.") + self.filename = self.datadir / f"{self.filename}.h5" + # The chosen filters + self.filters = tb.Filters( + complevel=self.docompress, complib=self.complib, shuffle=1 + ) + print("Processing database:", self.filename) + + def open_db(self, remove=0): + if remove and Path(self.filename).is_file(): + Path(self.filename).unlink() + con = tb.open_file(self.filename, "a") + return con + + def close_db(self, con): + # Remove first the table_cache attribute if it exists + if hasattr(self, "table_cache"): + del self.table_cache + con.close() + + def create_table(self, con): + class Record(tb.IsDescription): + col1 = tb.Int32Col() + col2 = tb.Int32Col() + col3 = tb.Float64Col() + col4 = tb.Float64Col() + + con.create_table( + con.root, + "table", + Record, + filters=self.filters, + expectedrows=self.nrows, + ) + + def fill_table(self, con): + "Fills the table" + table = con.root.table + j = 0 + for i in range(0, self.nrows, self.step): + stop = (j + 1) * self.step + if stop > self.nrows: + stop = self.nrows + arr_i4, arr_f8 = self.fill_arrays(i, stop) + # recarr = records.fromarrays([arr_i4, arr_i4, arr_f8, arr_f8]) + # table.append(recarr) + table.append([arr_i4, arr_i4, arr_f8, arr_f8]) + j += 1 + table.flush() + + def index_col(self, con, column, kind, optlevel, verbose): + col = getattr(con.root.table.cols, column) + tmp_dir = self.datadir / "scratch2" + tmp_dir.mkdir(parents=True, exist_ok=True) + col.create_index( + kind=kind, + optlevel=optlevel, + filters=self.filters, + tmp_dir=tmp_dir, + _verbose=verbose, + _blocksizes=None, + ) + + # _blocksizes=(2**27, 2**22, 2**15, 2**7)) + # _blocksizes=(2**27, 2**22, 2**14, 2**6)) + # _blocksizes=(2**27, 2**20, 2**13, 2**5), + # _testmode=True) + + def do_query(self, con, column, base, inkernel): + if True: + if not hasattr(self, "table_cache"): + self.table_cache = table = con.root.table + self.colobj = getattr(table.cols, column) + # self.colobj = getattr(table.cols, 'col1') + self.condvars = { + "col": self.colobj, + "col1": table.cols.col1, + "col2": table.cols.col2, + "col3": table.cols.col3, + "col4": table.cols.col4, + } + table = self.table_cache + colobj = self.colobj + else: + table = con.root.table + colobj = getattr(table.cols, column) + self.condvars = { + "col": colobj, + "col1": table.cols.col1, + "col2": table.cols.col2, + "col3": table.cols.col3, + "col4": table.cols.col4, + } + self.condvars["inf"] = self.rng[0] + base + self.condvars["sup"] = self.rng[1] + base + # For queries that can use two indexes instead of just one + d = (self.rng[1] - self.rng[0]) / 2 + inf1 = int(self.rng[0] + base) + sup1 = int(self.rng[0] + d + base) + inf2 = self.rng[0] + base * 2 + sup2 = self.rng[0] + d + base * 2 + self.condvars["inf1"] = inf1 + self.condvars["sup1"] = sup1 + self.condvars["inf2"] = inf2 + self.condvars["sup2"] = sup2 + # condition = "(inf == col2)" + # condition = "(inf==col2) & (col4==sup)" + # condition = "(inf==col2) | (col4==sup)" + # condition = "(inf==col2) | (col2==sup)" + # condition = "(inf==col2) & (col3==sup)" + # condition = "((inf==col2) & (sup==col4)) & (col3==sup)" + # condition = "((inf==col1) & (sup==col4)) & (col3==sup)" + # condition = "(inf<=col1) & (col3", inf1, inf2, sup1, sup2 + condition = "((inf2<=col) & (col", c['inf'], c['sup'], c['inf2'], c['sup2'] + ncoords = 0 + if colobj.is_indexed: + results = [ + r[column] for r in table.where(condition, self.condvars) + ] + # coords = table.get_where_list(condition, self.condvars) + # results = table.read_coordinates(coords, field=column) + + # results = table.read_where(condition, self.condvars, field=column) + + elif inkernel: + print("Performing in-kernel query") + results = [ + r[column] for r in table.where(condition, self.condvars) + ] + # coords = [r.nrow for r in table.where(condition, self.condvars)] + # results = table.read_coordinates(coords) + # for r in table.where(condition, self.condvars): + # var = r[column] + # ncoords += 1 + else: + # coords = [r.nrow for r in table + # if (self.rng[0]+base <= r[column] <= self.rng[1]+base)] + # results = table.read_coordinates(coords) + print("Performing regular query") + results = [ + r[column] + for r in table + if ( + ((inf2 <= r["col4"]) and (r["col4"] < sup2)) + or ((inf1 < r["col2"]) and (r["col2"] < sup1)) + and ( + (r["col1"] + 3.1 * r["col2"] + r["col3"] * r["col4"]) + > 3 + ) + ) + ] + + ncoords = len(results) + + # return coords + # print "results-->", results + # return results + return ncoords + # self.tprof.append( self.colobj.index.tprof ) + # return ncoords, self.tprof diff --git a/bench/query_meteo_data.py b/bench/query_meteo_data.py new file mode 100644 index 0000000..866869a --- /dev/null +++ b/bench/query_meteo_data.py @@ -0,0 +1,88 @@ +#!/usr/bin/env python +# Benchmark the times reading large datasets with Blosc and Blosc2 filters. + +import sys +from time import time + +import numpy as np +import pandas as pd + +import tables as tb + + +def time_inkernel(table_blosc): + t1 = time() + res = [ + x["precip"] + for x in table_blosc.where( + "(lat > 50) & (20 <= lon) & (lon < 50) & (time < 10)" + ) + ] + # res = [x['precip'] for x in table_blosc.where("(time < 10)")] + print(len(res)) + return time() - t1 + + +def time_read(table): + n_reads = 10_000 + t0 = time() + idxs_to_read = np.random.randint(0, table.nrows, n_reads) + # print(f"Time to create indexes: {time() - t0:.3f}s") + + print(f"Randomly reading {n_reads // 1_000} Krows...", end="") + t0 = time() + for i in idxs_to_read: + _ = table[i] + t = time() - t0 + print(f"\t{t:.3f}s ({t / n_reads * 1e6:.1f} us/read)") + + print(f"Querying {table.nrows // 1000_000_000} Grows...", end="") + t0 = time() + _ = [x["precip"] for x in table.where("(time < 10)")] + t = time() - t0 + print( + f"\t\t{t:.3f}s ({table.nrows * table.dtype.itemsize / t / 2**30:.1f} GB/s)" + ) + + +def time_pandas(df): + t1 = time() + res = df.query("(lat > 50) & (20 <= lon) & (lon < 50) & (time < 10)")[ + "precip" + ] + print(len(res)) + return time() - t1 + + +def pandas_create_df(): + f = tb.open_file("wblosc_table.h5", "r") + df = pd.DataFrame(f.root.table_blosc[:]) + f.close() + return df + + +def inkernel_blosc2_blosclz(table): + print( + f"Time to read 6 inkernel queries with Blosc2 (blosclz): {time_inkernel(table):.3f} sec" + ) + + +def inkernel_blosc2_lz4(table): + print( + f"Time to read 6 inkernel queries with Blosc2 (lz4): {time_inkernel(table):.3f} sec" + ) + + +def pandas_query_numexpr(df): + print( + f"Time to perform 6 pandas+numexpr queries: {time_pandas(df):.3f} sec" + ) + + +f = tb.open_file(sys.argv[1]) +table = f.root.table_blosc +time_read(table) +f.close() + +# df = pandas_create_df() +# pandas_query_numexpr(df) diff --git a/bench/read-table.py b/bench/read-table.py new file mode 100644 index 0000000..233a925 --- /dev/null +++ b/bench/read-table.py @@ -0,0 +1,57 @@ +import sys +from time import time + +import numpy as np + +import tables as tb + +N = 200 +NBUNCH = 100_000_000 +NREADS = 100_000 +filename = "read-table.h5" + + +class Particle(tb.IsDescription): + lati = tb.Int32Col() + longi = tb.Int32Col() + pressure = tb.Float32Col() + temperature = tb.Float32Col() + + +if len(sys.argv) > 1 and sys.argv[1] == "w": + # Open a file in "w"rite mode + print(f"Creating {filename} with {NBUNCH * N / 1_000_000} Mrows...") + t0 = time() + fileh = tb.open_file(filename, mode="w") + # Create a new table in newgroup group + table = fileh.create_table( + fileh.root, + "table", + Particle, + "A table", + tb.Filters(complevel=1, complib="blosc2"), + expectedrows=NBUNCH * N, + ) + # A bunch of particles + particles = np.zeros(NBUNCH, dtype=table.dtype) + + # Fill the table with N chunks of particles + for i in range(N): + table.append(particles) + table.flush() + print(f"Time to create: {time() - t0:.3f}s") +else: + fileh = tb.open_file(filename) + table = fileh.root.table + +t0 = time() +idxs_to_read = np.random.randint(0, NBUNCH, NREADS) +print(f"Time to create indexes: {time() - t0:.3f}s") + +print(f"Reading {NREADS / 1_000} Krows...") +t0 = time() +for i in idxs_to_read: + row = table[i] +print(f"Time to read: {time() - t0:.3f}s") + +fileh.close() diff --git a/bench/recarray2-test.py b/bench/recarray2-test.py new file mode 100644 index 0000000..089423c --- /dev/null +++ b/bench/recarray2-test.py @@ -0,0 +1,113 @@ +import sys +from time import perf_counter as clock +from pathlib import Path + +import numpy as np +import recarray +import chararray +import recarray2 # This is my modified version + +usage = ( + """usage: %s recordlength + Set recordlength to 1000 at least to obtain decent figures! +""" + % sys.argv[0] +) + +try: + reclen = int(sys.argv[1]) +except Exception: + print(usage) + sys.exit() + +delta = 0.000_001 + +# Creation of recarrays objects for test +x1 = np.array(np.arange(reclen)) +x2 = chararray.array(None, itemsize=7, shape=reclen) +x3 = np.array(np.arange(reclen, reclen * 3, 2), np.float64) +r1 = recarray.fromarrays([x1, x2, x3], names="a,b,c") +r2 = recarray2.fromarrays([x1, x2, x3], names="a,b,c") + +print("recarray shape in test ==>", r2.shape) + +print("Assignment in recarray original") +print("-------------------------------") +t1 = clock() +for row in range(reclen): + # r1.field("b")[row] = "changed" + r1.field("c")[row] = float(row**2) +t2 = clock() +origtime = t2 - t1 +print(f"Assign time: {origtime:.3f} Rows/s: {reclen / (origtime + delta):.0f}") +# print "Field b on row 2 after re-assign:", r1.field("c")[2] +print() + +print("Assignment in recarray modified") +print("-------------------------------") +t1 = clock() +for row in range(reclen): + rec = r2._row(row) # select the row to be changed + # rec.b = "changed" # change the "b" field + rec.c = float(row**2) # Change the "c" field +t2 = clock() +ttime = t2 - t1 +print( + f"Assign time: {ttime:.3f} Rows/s: {reclen / (ttime + delta):.0f}", end=" " +) +print(f" Speed-up: {origtime / ttime:.3f}") +# print "Field b on row 2 after re-assign:", r2.field("c")[2] +print() + +print("Selection in recarray original") +print("------------------------------") +t1 = clock() +for row in range(reclen): + rec = r1[row] + if rec.field("a") < 3: + print("This record pass the cut ==>", rec.field("c"), "(row", row, ")") +t2 = clock() +origtime = t2 - t1 +print( + f"Select time: {origtime:.3f}, Rows/s: {reclen / (origtime + delta):.0f}" +) +print() + +print("Selection in recarray modified") +print("------------------------------") +t1 = clock() +for row in range(reclen): + rec = r2._row(row) + if rec.a < 3: + print("This record pass the cut ==>", rec.c, "(row", row, ")") +t2 = clock() +ttime = t2 - t1 +print( + f"Select time: {ttime:.3f} Rows/s: {reclen / (ttime + delta):.0f}", end=" " +) +print(f" Speed-up: {origtime / ttime:.3f}") +print() + +print("Printing in recarray original") +print("------------------------------") +with Path("test.out").open("w") as f: + t1 = clock() + f.write(str(r1)) + t2 = clock() + origtime = t2 - t1 +Path("test.out").unlink() +print(f"Print time: {origtime:.3f} Rows/s: {reclen / (origtime + delta):.0f}") +print() +print("Printing in recarray modified") +print("------------------------------") +with Path("test2.out").open("w") as f: + t1 = clock() + f.write(str(r2)) + t2 = clock() + ttime = t2 - t1 +Path("test2.out").unlink() +print( + f"Print time: {ttime:.3f} Rows/s: {reclen / (ttime + delta):.0f}", end=" " +) +print(f" Speed-up: {origtime / ttime:.3f}") +print() diff --git a/bench/search-bench-plot.py b/bench/search-bench-plot.py new file mode 100644 index 0000000..4945467 --- /dev/null +++ b/bench/search-bench-plot.py @@ -0,0 +1,172 @@ +from matplotlib import pyplot as plt + +import tables as tb + + +def get_values(filename, complib=""): + f = tb.open_file(filename) + nrows = f.root.small.create_best.cols.nrows[:] + corrected_sizes = nrows / 10**6 + if mb_units: + corrected_sizes = 16 * nrows / 10**6 + if insert: + values = corrected_sizes / f.root.small.create_best.cols.tfill[:] + if table_size: + values = f.root.small.create_best.cols.fsize[:] / nrows + if query: + values = ( + corrected_sizes + / f.root.small.search_best.inkernel.int.cols.time1[:] + ) + if query_cache: + values = ( + corrected_sizes + / f.root.small.search_best.inkernel.int.cols.time2[:] + ) + + f.close() + return nrows, values + + +def show_plot(plots, yaxis, legends, gtitle): + plt.xlabel("Number of rows") + plt.ylabel(yaxis) + plt.xlim(10**3, 10**8) + plt.title(gtitle) + plt.grid(True) + + # legends = [f[f.find('-'):f.index('.out')] for f in filenames] + # legends = [l.replace('-', ' ') for l in legends] + if table_size: + plt.legend([p[0] for p in plots], legends, loc="upper right") + else: + plt.legend([p[0] for p in plots], legends, loc="upper left") + + # subplots_adjust(bottom=0.2, top=None, wspace=0.2, hspace=0.2) + if outfile: + plt.savefig(outfile) + else: + plt.show() + + +if __name__ == "__main__": + + import sys + import getopt + + usage = ( + """usage: %s [-o file] [-t title] [--insert] [--table-size] [--query] [--query-cache] [--MB-units] files + -o filename for output (only .png and .jpg extensions supported) + -t title of the plot + --insert -- Insert time for table + --table-size -- Size of table + --query -- Time for querying the integer column + --query-cache -- Time for querying the integer (cached) + --MB-units -- Express speed in MB/s instead of MRows/s + \n""" + % sys.argv[0] + ) + + try: + opts, pargs = getopt.getopt( + sys.argv[1:], + "o:t:", + [ + "insert", + "table-size", + "query", + "query-cache", + "MB-units", + ], + ) + except Exception: + sys.stderr.write(usage) + sys.exit(0) + + progname = sys.argv[0] + args = sys.argv[1:] + + # if we pass too few parameters, abort + if len(pargs) < 1: + sys.stderr.write(usage) + sys.exit(0) + + # default options + outfile = None + insert = 0 + table_size = 0 + query = 0 + query_cache = 0 + mb_units = 0 + yaxis = "No axis name" + tit = None + gtitle = "Please set a title!" + + # Get the options + for option in opts: + if option[0] == "-o": + outfile = option[1] + elif option[0] == "-t": + tit = option[1] + elif option[0] == "--insert": + insert = 1 + yaxis = "MRows/s" + gtitle = "Writing with small (16 bytes) record size" + elif option[0] == "--table-size": + table_size = 1 + yaxis = "Bytes/row" + gtitle = ( + "Disk space taken by a record (original record size: " + "16 bytes)" + ) + elif option[0] == "--query": + query = 1 + yaxis = "MRows/s" + gtitle = ( + "Selecting with small (16 bytes) record size (file not " + "in cache)" + ) + elif option[0] == "--query-cache": + query_cache = 1 + yaxis = "MRows/s" + gtitle = ( + "Selecting with small (16 bytes) record size (file in " + "cache)" + ) + elif option[0] == "--MB-units": + mb_units = 1 + + filenames = pargs + + if mb_units and yaxis == "MRows/s": + yaxis = "MB/s" + + if tit: + gtitle = tit + + plots = [] + legends = [] + for filename in filenames: + plegend = filename[filename.find("cl-") + 3 : filename.index(".h5")] + plegend = plegend.replace("-", " ") + xval, yval = get_values(filename, "") + print(f"Values for {filename} --> {xval}, {yval}") + # plots.append(loglog(xval, yval, linewidth=5)) + plots.append(plt.semilogx(xval, yval, linewidth=4)) + legends.append(plegend) + if 0: # Per a introduir dades simulades si es vol... + xval = [ + 1000, + 10_000, + 100_000, + 1_000_000, + 10_000_000, + 100_000_000, + 1_000_000_000, + ] + # yval = [0.003, 0.005, 0.02, 0.06, 1.2, + # 40, 210] + yval = [0.0009, 0.0011, 0.0022, 0.005, 0.02, 0.2, 5.6] + plots.append(plt.loglog(xval, yval, linewidth=5)) + legends.append("PyTables Std") + show_plot(plots, yaxis, legends, gtitle) diff --git a/bench/search-bench-rnd.sh b/bench/search-bench-rnd.sh new file mode 100755 index 0000000..7c2d015 --- /dev/null +++ b/bench/search-bench-rnd.sh @@ -0,0 +1,122 @@ +#!/bin/sh +# I don't know why, but the /usr/bin/python2.3 from Debian is a 30% slower +# than my own compiled version! 2004-08-18 +python="/usr/local/bin/python2.3 -O" + +writedata () { + nrows=$1 + bfile=$2 + worst=$3 + psyco=$4 + if [ "$shuffle" = "1" ]; then + shufflef="-S" + else + shufflef="" + fi + cmd="${python} search-bench.py -R ${worst} -b ${bfile} -h ${psyco} -l ${libcomp} -c ${complevel} ${shufflef} -w -n ${nrows} data.nobackup/bench-${libcomp}-${nrows}k.h5" + echo ${cmd} + ${cmd} +} + +readdata () { + nrows=$1 + bfile=$2 + worst=$3 + psyco=$4 + smode=$5 + + if [ "$smode" = "indexed" ]; then + #repeats=100 + repeats=20 + else + repeats=2 + fi + cmd="${python} search-bench.py -R ${worst} -h -b ${bfile} ${psyco} -m ${smode} -r -k ${repeats} data.nobackup/bench-${libcomp}-${nrows}k.h5" + echo ${cmd} + ${cmd} + return +} + +overwrite=0 +if [ $# > 1 ]; then + if [ "$1" = "-o" ]; then + overwrite=1 + fi +fi +if [ $# > 2 ]; then + psyco=$2 +fi + +# Configuration for testing +#nrowslist="50000" +#nrowslistworst="50000" + +# Normal test +#nrowslist="1 2 5 10 20 50 100 200 500 1000 2000 5000 10000 20000" +#nrowslistworst="1 2 5 10 20 50 100 200 500 1000 2000 5000 10000 20000" +nrowslist="1 2 5 10 20 50 100 200 500 1000" +nrowslistworst="1 2 5 10 20 50 100 200 500 1000" +#nrowslist="1 2 5 10" +#nrowslistworst="1 2 5 10" + +# The next can be regarded as parameters +shuffle=1 + +for libcomp in none zlib lzo; do +#for libcomp in none lzo; do + if [ "$libcomp" = "none" ]; then + complevel=0 + else + complevel=1 + fi + # The name of the data bench file + bfile="worst-dbench-cl-${libcomp}-c${complevel}-S${shuffle}.h5" + + # Move out a possible previous benchmark file + bn=`basename $bfile ".h5"` + mv -f ${bn}-bck2.h5 ${bn}-bck3.h5 + mv -f ${bn}-bck.h5 ${bn}-bck2.h5 + if [ "$overwrite" = "1" ]; then + echo "moving ${bn}.h5 to ${bn}-bck.h5" + mv -f ${bn}.h5 ${bn}-bck.h5 + else + echo "copying ${bn}.h5 to ${bn}-bck.h5" + cp -f ${bn}.h5 ${bn}-bck.h5 + fi + for worst in "" -t; do + #for worst in ""; do + # Write data files + if [ "$worst" = "-t" ]; then + echo + echo "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" + echo "Entering worst case..." + echo "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" + echo + nrowslist=$nrowslistworst + fi + # Write data file + for nrows in $nrowslist; do + echo "*************************************************************" + echo "Writing for nrows=$nrows Krows, psyco=$psyco, worst='${worst}'" + echo "*************************************************************" + writedata ${nrows} ${bfile} "${worst}" "${psyco}" + done + # Read data files + for smode in indexed inkernel standard; do + ${python} cacheout.py + for nrows in $nrowslist; do + echo "***********************************************************" + echo "Searching for nrows=$nrows Krows, $smode, psyco=$psyco, worst='${worst}'" + echo "***********************************************************" + readdata ${nrows} ${bfile} "${worst}" "${psyco}" "${smode}" + done + done + # Finally, after the final search, delete the source (if desired) +# for nrows in $nrowslist; do +# rm -f data.nobackup/bench-${libcomp}-${nrows}k.h5 +# done + done + echo "New data available on: $bfile" +done + +exit 0 diff --git a/bench/search-bench.py b/bench/search-bench.py new file mode 100644 index 0000000..fd81af1 --- /dev/null +++ b/bench/search-bench.py @@ -0,0 +1,555 @@ +#!/usr/bin/env python + +import sys +import random +import warnings +from time import perf_counter as clock +from time import process_time as cpuclock +from pathlib import Path + +import numpy as np + +import tables as tb + +# Initialize the random generator always with the same integer +# in order to have reproductible results +random.seed(19) +np.random.seed(19) + +randomvalues = 0 +worst = 0 + +Small = { + "var1": tb.StringCol(itemsize=4, dflt="Hi!", pos=2), + "var2": tb.Int32Col(pos=1), + "var3": tb.Float64Col(pos=0), + # "var4" : BoolCol(), +} + + +def create_new_bench_file(bfile, verbose): + + class Create(tb.IsDescription): + nrows = tb.Int32Col(pos=0) + irows = tb.Int32Col(pos=1) + tfill = tb.Float64Col(pos=2) + tidx = tb.Float64Col(pos=3) + tcfill = tb.Float64Col(pos=4) + tcidx = tb.Float64Col(pos=5) + rowsecf = tb.Float64Col(pos=6) + rowseci = tb.Float64Col(pos=7) + fsize = tb.Float64Col(pos=8) + isize = tb.Float64Col(pos=9) + psyco = tb.BoolCol(pos=10) + + class Search(tb.IsDescription): + nrows = tb.Int32Col(pos=0) + rowsel = tb.Int32Col(pos=1) + time1 = tb.Float64Col(pos=2) + time2 = tb.Float64Col(pos=3) + tcpu1 = tb.Float64Col(pos=4) + tcpu2 = tb.Float64Col(pos=5) + rowsec1 = tb.Float64Col(pos=6) + rowsec2 = tb.Float64Col(pos=7) + psyco = tb.BoolCol(pos=8) + + if verbose: + print("Creating a new benchfile:", bfile) + # Open the benchmarking file + bf = tb.open_file(bfile, "w") + # Create groups + for recsize in ["small"]: + group = bf.create_group("/", recsize, recsize + " Group") + # Attach the row size of table as attribute + if recsize == "small": + group._v_attrs.rowsize = 16 + # Create a Table for writing bench + bf.create_table(group, "create_best", Create, "best case") + bf.create_table(group, "create_worst", Create, "worst case") + for case in ["best", "worst"]: + # create a group for searching bench (best case) + group_s = bf.create_group(group, "search_" + case, "Search Group") + # Create Tables for searching + for mode in ["indexed", "inkernel", "standard"]: + group_m = bf.create_group(group_s, mode, mode + " Group") + # for searching bench + # for atom in ["string", "int", "float", "bool"]: + for atom in ["string", "int", "float"]: + bf.create_table(group_m, atom, Search, atom + " bench") + bf.close() + + +def create_file(filename, nrows, filters, index, heavy, noise, verbose): + + # Open a file in "w"rite mode + fileh = tb.open_file( + filename, mode="w", title="Searchsorted Benchmark", filters=filters + ) + rowswritten = 0 + + # Create the test table + table = fileh.create_table( + fileh.root, "table", Small, "test table", None, nrows + ) + + t1 = clock() + cpu1 = cpuclock() + nrowsbuf = table.nrowsinbuf + minimum = 0 + maximum = nrows + for i in range(0, nrows, nrowsbuf): + if i + nrowsbuf > nrows: + j = nrows + else: + j = i + nrowsbuf + if randomvalues: + var3 = np.random.uniform(minimum, maximum, size=j - i) + else: + var3 = np.arange(i, j, dtype=np.float64) + if noise > 0: + var3 += np.random.uniform(-noise, noise, size=j - i) + var2 = np.array(var3, dtype=np.int32) + var1 = np.empty(shape=[j - i], dtype="S4") + if not heavy: + var1[:] = var2 + table.append([var3, var2, var1]) + table.flush() + rowswritten += nrows + time1 = clock() - t1 + tcpu1 = cpuclock() - cpu1 + print( + f"Time for filling: {time1:.3f} Krows/s: {nrows / 1000 / time1:.3f}", + end=" ", + ) + fileh.close() + size1 = Path(filename).stat().st_size + print(f", File size: {size1 / 1024 / 1024:.3f} MB") + fileh = tb.open_file( + filename, mode="a", title="Searchsorted Benchmark", filters=filters + ) + table = fileh.root.table + rowsize = table.rowsize + if index: + t1 = clock() + cpu1 = cpuclock() + # Index all entries + if not heavy: + indexrows = table.cols.var1.create_index(filters=filters) + for colname in ["var2", "var3"]: + table.colinstances[colname].create_index(filters=filters) + time2 = clock() - t1 + tcpu2 = cpuclock() - cpu1 + print( + f"Time for indexing: {time2:.3f} " + f"iKrows/s: {indexrows / 1000 / time2:.3f}", + end=" ", + ) + else: + indexrows = 0 + time2 = 0.000_000_000_1 # an ugly hack + tcpu2 = 0 + + if verbose: + if index: + idx = table.cols.var1.index + print("Index parameters:", repr(idx)) + else: + print("NOT indexing rows") + # Close the file + fileh.close() + + size2 = Path(filename).stat().st_size - size1 + if index: + print(f", Index size: {size2 / 1024 / 1024:.3f} MB") + return ( + rowswritten, + indexrows, + rowsize, + time1, + time2, + tcpu1, + tcpu2, + size1, + size2, + ) + + +def bench_create( + file, nrows, filters, index, bfile, heavy, psyco, noise, verbose +): + + # Open the benchfile in append mode + bf = tb.open_file(bfile, "a") + recsize = "small" + if worst: + table = bf.get_node("/" + recsize + "/create_worst") + else: + table = bf.get_node("/" + recsize + "/create_best") + + (rowsw, irows, rowsz, time1, time2, tcpu1, tcpu2, size1, size2) = ( + create_file(file, nrows, filters, index, heavy, noise, verbose) + ) + # Collect data + table.row["nrows"] = rowsw + table.row["irows"] = irows + table.row["tfill"] = time1 + table.row["tidx"] = time2 + table.row["tcfill"] = tcpu1 + table.row["tcidx"] = tcpu2 + table.row["fsize"] = size1 + table.row["isize"] = size2 + table.row["psyco"] = psyco + print(f"Rows written: {rowsw} Row size: {rowsz}") + print( + f"Time writing rows: {time1} s (real) " + f"{tcpu1} s (cpu) {tcpu1 / time1:.0%}" + ) + rowsecf = rowsw / time1 + table.row["rowsecf"] = rowsecf + print(f"Total file size: {(size1 + size2) / 1024 / 1024:.3f} MB", end=" ") + print(f", Write KB/s (pure data): {rowsw * rowsz / (time1 * 1024):.0f}") + print(f"Rows indexed: {irows} (IMRows): {irows / 10 ** 6}") + print( + f"Time indexing rows: {time2:.3f} s (real) " + f"{tcpu2:.3f} s (cpu) {tcpu2 / time2:.0%}" + ) + rowseci = irows / time2 + table.row["rowseci"] = rowseci + table.row.append() + bf.close() + + +def read_file(filename, atom, riter, indexmode, dselect, verbose): + # Open the HDF5 file in read-only mode + + fileh = tb.open_file(filename, mode="r") + table = fileh.root.table + _ = table.cols.var1 + var2 = table.cols.var2 + _ = table.cols.var3 + if indexmode == "indexed": + if var2.index.nelements > 0: + where = table._whereIndexed + else: + warnings.warn( + "Not indexed table or empty index. Defaulting to in-kernel " + "selection" + ) + indexmode = "inkernel" + where = table._whereInRange + elif indexmode == "inkernel": + where = table.where + if verbose: + print("Max rows in buf:", table.nrowsinbuf) + print("Rows in", table._v_pathname, ":", table.nrows) + print("Buffersize:", table.rowsize * table.nrowsinbuf) + print("MaxTuples:", table.nrowsinbuf) + if indexmode == "indexed": + print("Chunk size:", var2.index.sorted.chunksize) + print("Number of elements per slice:", var2.index.nelemslice) + print("Slice number in", table._v_pathname, ":", var2.index.nrows) + + # table.nrowsinbuf = 10 + # print "nrowsinbuf-->", table.nrowsinbuf + rowselected = 0 + time2 = 0 + tcpu2 = 0 + results = [] + print("Select mode:", indexmode, ". Selecting for type:", atom) + # Initialize the random generator always with the same integer + # in order to have reproductible results on each read iteration + random.seed(19) + np.random.seed(19) + for i in range(riter): + # The interval for look values at. This is aproximately equivalent to + # the number of elements to select + rnd = np.random.randint(table.nrows) + cpu1 = cpuclock() + t1 = clock() + if atom == "string": + val = str(rnd)[-4:] + if indexmode in ["indexed", "inkernel"]: + results = [p.nrow for p in where("var1 == val")] + else: + results = [p.nrow for p in table if p["var1"] == val] + elif atom == "int": + val = rnd + dselect + if indexmode in ["indexed", "inkernel"]: + results = [ + p.nrow for p in where("(rnd <= var3) & (var3 < val)") + ] + else: + results = [p.nrow for p in table if rnd <= p["var2"] < val] + elif atom == "float": + val = rnd + dselect + if indexmode in ["indexed", "inkernel"]: + t1 = clock() + results = [ + p.nrow for p in where("(rnd <= var3) & (var3 < val)") + ] + else: + results = [ + p.nrow + for p in table + if float(rnd) <= p["var3"] < float(val) + ] + else: + raise ValueError("Value for atom '%s' not supported." % atom) + rowselected += len(results) + # print "selected values-->", results + if i == 0: + # First iteration + time1 = clock() - t1 + tcpu1 = cpuclock() - cpu1 + else: + if indexmode == "indexed": + # if indexed, wait until the 5th iteration (in order to + # insure that the index is effectively cached) to take times + if i >= 5: + time2 += clock() - t1 + tcpu2 += cpuclock() - cpu1 + else: + time2 += clock() - t1 + tcpu2 += cpuclock() - cpu1 + + if riter > 1: + if indexmode == "indexed" and riter >= 5: + correction = 5 + else: + correction = 1 + time2 = time2 / (riter - correction) + tcpu2 = tcpu2 / (riter - correction) + if verbose and 1: + print("Values that fullfill the conditions:") + print(results) + + # rowsread = table.nrows * riter + rowsread = table.nrows + rowsize = table.rowsize + + # Close the file + fileh.close() + + return (rowsread, rowselected, rowsize, time1, time2, tcpu1, tcpu2) + + +def bench_search( + file, riter, indexmode, bfile, heavy, psyco, dselect, verbose +): + + # Open the benchfile in append mode + bf = tb.open_file(bfile, "a") + recsize = "small" + if worst: + tableparent = "/" + recsize + "/search_worst/" + indexmode + "/" + else: + tableparent = "/" + recsize + "/search_best/" + indexmode + "/" + + # Do the benchmarks + if not heavy: + # atomlist = ["string", "int", "float", "bool"] + atomlist = ["string", "int", "float"] + else: + # atomlist = ["int", "float", "bool"] + atomlist = ["int", "float"] + for atom in atomlist: + tablepath = tableparent + atom + table = bf.get_node(tablepath) + (rowsr, rowsel, rowssz, time1, time2, tcpu1, tcpu2) = read_file( + file, atom, riter, indexmode, dselect, verbose + ) + row = table.row + row["nrows"] = rowsr + row["rowsel"] = rowsel + treadrows = time1 + row["time1"] = time1 + treadrows2 = time2 + row["time2"] = time2 + cpureadrows = tcpu1 + row["tcpu1"] = tcpu1 + cpureadrows2 = tcpu2 + row["tcpu2"] = tcpu2 + row["psyco"] = psyco + tratio = cpureadrows / treadrows + tratio2 = cpureadrows2 / treadrows2 if riter > 1 else 0.0 + t_m_rows = rowsr / (1000 * 1000.0) + s_k_rows = rowsel / 1000.0 + if atom == "string": # just to print once + print(f"Rows read: {rowsr} Mread: {t_m_rows:.6f} Mrows") + print(f"Rows selected: {rowsel} Ksel: {s_k_rows:.6f} Krows") + print( + f"Time selecting (1st time): {treadrows:.6f} s " + f"(real) {cpureadrows:.6f} s (cpu) {tratio:.0%}" + ) + if riter > 1: + print( + f"Time selecting (cached): {treadrows2:.6f} s " + f"(real) {cpureadrows2:.6f} s (cpu) {tratio2:.0%}" + ) + rowsec1 = rowsr / treadrows + row["rowsec1"] = rowsec1 + print(f"Read Mrows/sec: {rowsec1 / 10 ** 6:.6f} (first time)", end=" ") + if riter > 1: + rowsec2 = rowsr / treadrows2 + row["rowsec2"] = rowsec2 + print(f"{rowsec2 / 10 ** 6:.6f} (cache time)") + else: + print() + # Append the info to the table + row.append() + table.flush() + # Close the benchmark file + bf.close() + + +if __name__ == "__main__": + import getopt + + try: + import psyco + + psyco_imported = 1 + except Exception: + psyco_imported = 0 + + usage = ( + """usage: %s [-v] [-p] [-R] [-r] [-w] [-c level] [-l complib] [-S] [-F] [-n nrows] [-x] [-b file] [-t] [-h] [-k riter] [-m indexmode] [-N range] [-d range] datafile + -v verbose + -p use "psyco" if available + -R use Random values for filling + -r only read test + -w only write test + -c sets a compression level (do not set it or 0 for no compression) + -l sets the compression library ("zlib", "lzo", "ucl", "bzip2" or "none") + -S activate shuffling filter + -F activate fletcher32 filter + -n set the number of rows in tables (in krows) + -x don't make indexes + -b bench filename + -t worsT searching case + -h heavy benchmark (operations without strings) + -m index mode for reading ("indexed" | "inkernel" | "standard") + -N introduce (uniform) noise within range into the values + -d the interval for look values (int, float) at. Default is 3. + -k number of iterations for reading\n""" + % sys.argv[0] + ) + + try: + opts, pargs = getopt.getopt( + sys.argv[1:], "vpSFRrowxthk:b:c:l:n:m:N:d:" + ) + except Exception: + sys.stderr.write(usage) + sys.exit(0) + + # if we pass too much parameters, abort + if len(pargs) != 1: + sys.stderr.write(usage) + sys.exit(0) + + # default options + dselect = 3 + noise = 0 + verbose = 0 + field_name = None + testread = 1 + testwrite = 1 + usepsyco = 0 + complevel = 0 + shuffle = 0 + fletcher32 = 0 + complib = "zlib" + nrows = 1000 + index = 1 + heavy = 0 + bfile = "bench.h5" + supported_imodes = ["indexed", "inkernel", "standard"] + indexmode = "inkernel" + riter = 1 + + # Get the options + for option in opts: + if option[0] == "-v": + verbose = 1 + if option[0] == "-p": + usepsyco = 1 + if option[0] == "-R": + randomvalues = 1 + if option[0] == "-S": + shuffle = 1 + if option[0] == "-F": + fletcher32 = 1 + elif option[0] == "-r": + testwrite = 0 + elif option[0] == "-w": + testread = 0 + elif option[0] == "-x": + index = 0 + elif option[0] == "-h": + heavy = 1 + elif option[0] == "-t": + worst = 1 + elif option[0] == "-b": + bfile = option[1] + elif option[0] == "-c": + complevel = int(option[1]) + elif option[0] == "-l": + complib = option[1] + elif option[0] == "-m": + indexmode = option[1] + if indexmode not in supported_imodes: + raise ValueError( + "Indexmode should be any of '%s' and you passed '%s'" + % (supported_imodes, indexmode) + ) + elif option[0] == "-n": + nrows = int(float(option[1]) * 1000) + elif option[0] == "-N": + noise = float(option[1]) + elif option[0] == "-d": + dselect = float(option[1]) + elif option[0] == "-k": + riter = int(option[1]) + + if worst: + nrows -= 1 # the worst case + + if complib == "none": + # This means no compression at all + complib = "zlib" # just to make PyTables not complaining + complevel = 0 + + # Catch the hdf5 file passed as the last argument + file = pargs[0] + + # Build the Filters instance + filters = tb.Filters( + complevel=complevel, + complib=complib, + shuffle=shuffle, + fletcher32=fletcher32, + ) + + # Create the benchfile (if needed) + if not Path(bfile).exists(): + create_new_bench_file(bfile, verbose) + + if testwrite: + if verbose: + print("Compression level:", complevel) + if complevel > 0: + print("Compression library:", complib) + if shuffle: + print("Suffling...") + if psyco_imported and usepsyco: + psyco.bind(create_file) + bench_create( + file, nrows, filters, index, bfile, heavy, usepsyco, noise, verbose + ) + if testread: + if psyco_imported and usepsyco: + psyco.bind(read_file) + bench_search( + file, riter, indexmode, bfile, heavy, usepsyco, dselect, verbose + ) diff --git a/bench/search-bench.sh b/bench/search-bench.sh new file mode 100755 index 0000000..402bd77 --- /dev/null +++ b/bench/search-bench.sh @@ -0,0 +1,123 @@ +#!/bin/sh +python="python2.5 -O" + +writedata () { + nrows=$1 + bfile=$2 + heavy=$3 + psyco=$4 + if [ "$shuffle" = "1" ]; then + shufflef="-S" + else + shufflef="" + fi + cmd="${python} search-bench.py -b ${bfile} ${heavy} ${psyco} -l ${libcomp} -c ${complevel} ${shufflef} -w -n ${nrows} -x data.nobackup/bench-${libcomp}-${nrows}k.h5" + echo ${cmd} + ${cmd} +} + +readdata () { + nrows=$1 + bfile=$2 + heavy=$3 + psyco=$4 + smode=$5 + + if [ "$smode" = "indexed" ]; then + repeats=100 + else + repeats=2 + fi + if [ "$heavy" = "-h" -a "$smode" = "standard" ]; then + # For heavy mode don't do a standard search + echo "Skipping the standard search for heavy mode" + else + cmd="${python} search-bench.py -b ${bfile} ${heavy} ${psyco} -m ${smode} -r -k ${repeats} data.nobackup/bench-${libcomp}-${nrows}k.h5" + echo ${cmd} + ${cmd} + fi + if [ "$smode" = "standard" -a "1" = "0" ]; then + # Finally, after the final search, delete the source (if desired) + rm -f data.nobackup/bench-${libcomp}-${nrows}k.h5 + fi + return +} + +overwrite=0 +if [ $# > 1 ]; then + if [ "$1" = "-o" ]; then + overwrite=1 + fi +fi +if [ $# > 2 ]; then + psyco=$2 +fi +# The next can be regarded as parameters +libcomp="lzo" +complevel=1 +shuffle=1 + +# The name of the data bench file +bfile="dbench-cl-${libcomp}-c${complevel}-S${shuffle}.h5" + +# Move out a possible previous benchmark file +bn=`basename $bfile ".h5"` +mv -f ${bn}-bck2.h5 ${bn}-bck3.h5 +mv -f ${bn}-bck.h5 ${bn}-bck2.h5 +if [ "$overwrite" = "1" ]; then + echo "moving ${bn}.h5 to ${bn}-bck.h5" + mv -f ${bn}.h5 ${bn}-bck.h5 +else + echo "copying ${bn}.h5 to ${bn}-bck.h5" + cp -f ${bn}.h5 ${bn}-bck.h5 +fi + +# Configuration for testing +nrowslist="1 2" +nrowslistheavy="5 10" +# This config takes 10 minutes to complete (psyco, zlib) +#nrowslist="1 2 5 10 20 50 100 200 500 1000" +#nrowslistheavy="2000 5000 10000" +#nrowslist="" +#nrowslistheavy="1 2 5 10 20 50 100 200 500 1000 2000 5000 10000 20000 50000 100000" + +# Normal test +#nrowslist="1 2 5 10 20 50 100 200 500 1000 2000 5000 10000" +#nrowslistheavy="20000 50000 100000 200000 500000 1000000" +# Big test +#nrowslist="1 2 5 10 20 50 100 200 500 1000 2000 5000 10000" +#nrowslistheavy="20000 50000 100000 200000 500000 1000000 2000000 5000000" + +for heavy in "" -h; do + # Write data files (light mode) + if [ "$heavy" = "-h" ]; then + echo + echo "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" + echo "Entering heavy mode..." + echo "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" + echo + nrowslist=$nrowslistheavy + fi + # Write data file + for nrows in $nrowslist; do + echo "*************************************************************" + echo "Writing for nrows=$nrows Krows, psyco=$psyco, heavy='${heavy}'" + echo "*************************************************************" + writedata ${nrows} ${bfile} "${heavy}" "${psyco}" + done + # Read data files + #for smode in indexed inkernel standard; do + for smode in inkernel standard; do +# for smode in indexed; do + ${python} cacheout.py + for nrows in $nrowslist; do + echo "***********************************************************" + echo "Searching for nrows=$nrows Krows, $smode, psyco=$psyco, heavy='${heavy}'" + echo "***********************************************************" + readdata ${nrows} ${bfile} "${heavy}" "${psyco}" "${smode}" + done + done +done + +echo "New data available on: $bfile" +exit 0 diff --git a/bench/searchsorted-bench.py b/bench/searchsorted-bench.py new file mode 100644 index 0000000..84b5761 --- /dev/null +++ b/bench/searchsorted-bench.py @@ -0,0 +1,362 @@ +#!/usr/bin/env python + +from time import perf_counter as clock +from time import process_time as cpuclock + +import tables as tb + + +class Small(tb.IsDescription): + var1 = tb.StringCol(itemsize=4) + var2 = tb.Int32Col() + var3 = tb.Float64Col() + var4 = tb.BoolCol() + + +# Define a user record to characterize some kind of particles + + +class Medium(tb.IsDescription): + var1 = tb.StringCol(itemsize=16) # 16-character String + # float1 = Float64Col(dflt=2.3) + # float2 = Float64Col(dflt=2.3) + # zADCcount = Int16Col() # signed short integer + var2 = tb.Int32Col() # signed short integer + var3 = tb.Float64Col() + grid_i = tb.Int32Col() # integer + grid_j = tb.Int32Col() # integer + pressure = tb.Float32Col() # float (single-precision) + energy = tb.Float64Col(shape=2) # double (double-precision) + + +def create_file(filename, nrows, filters, atom, recsize, index, verbose): + + # Open a file in "w"rite mode + fileh = tb.open_file( + filename, mode="w", title="Searchsorted Benchmark", filters=filters + ) + title = "This is the IndexArray title" + # Create an IndexArray instance + rowswritten = 0 + # Create an entry + klass = {"small": Small, "medium": Medium} + table = fileh.create_table( + fileh.root, "table", klass[recsize], title, None, nrows + ) + for i in range(nrows): + # table.row['var1'] = str(i) + # table.row['var2'] = random.randrange(nrows) + table.row["var2"] = i + table.row["var3"] = i + # table.row['var4'] = i % 2 + # table.row['var4'] = i > 2 + table.row.append() + rowswritten += nrows + table.flush() + rowsize = table.rowsize + indexrows = 0 + + # Index one entry: + if index: + if atom == "string": + indexrows = table.cols.var1.create_index() + elif atom == "bool": + indexrows = table.cols.var4.create_index() + elif atom == "int": + indexrows = table.cols.var2.create_index() + elif atom == "float": + indexrows = table.cols.var3.create_index() + else: + raise ValueError("Index type not supported yet") + if verbose: + print("Number of indexed rows:", indexrows) + # Close the file (eventually destroy the extended type) + fileh.close() + + return (rowswritten, rowsize) + + +def read_file(filename, atom, niter, verbose): + # Open the HDF5 file in read-only mode + + fileh = tb.open_file(filename, mode="r") + table = fileh.root.table + print("reading", table) + if atom == "string": + idxcol = table.cols.var1.index + elif atom == "bool": + idxcol = table.cols.var4.index + elif atom == "int": + idxcol = table.cols.var2.index + else: + idxcol = table.cols.var3.index + if verbose: + print("Max rows in buf:", table.nrowsinbuf) + print("Rows in", table._v_pathname, ":", table.nrows) + print("Buffersize:", table.rowsize * table.nrowsinbuf) + print("MaxTuples:", table.nrowsinbuf) + print("Chunk size:", idxcol.sorted.chunksize) + print("Number of elements per slice:", idxcol.nelemslice) + print("Slice number in", table._v_pathname, ":", idxcol.nrows) + + rowselected = 0 + if atom == "string": + for i in range(niter): + # results = [table.row["var3"] for i in table.where(2+i<=table.cols.var2 < 10+i)] + # results = [table.row.nrow() for i in table.where(2<=table.cols.var2 < 10)] + results = [ + p["var1"] + for p in table.where(table.cols.var1 == "1111") # p.nrow() + ] + # for p in table.where("1000"<=table.cols.var1<="1010")] + rowselected += len(results) + elif atom == "bool": + for i in range(niter): + results = [ + p["var2"] for p in table.where(table.cols.var4 == 0) + ] # p.nrow() + rowselected += len(results) + elif atom == "int": + for i in range(niter): + # results = [table.row["var3"] for i in table.where(2+i<=table.cols.var2 < 10+i)] + # results = [table.row.nrow() for i in table.where(2<=table.cols.var2 < 10)] + results = [ + p["var2"] # p.nrow() + # for p in table.where(110*i<=table.cols.var2<110*(i+1))] + # for p in table.where(1000-30", positions) + print("Total iterations in search:", niter) + + rowsread += table.nrows + uncompr_bytes += idxcol.sorted.chunksize * niter * idxcol.sorted.itemsize + + results = table.read(coords=positions) + print("results length:", len(results)) + if verbose: + print("Values that fullfill the conditions:") + print(results) + + # Close the file (eventually destroy the extended type) + fileh.close() + + return (rowsread, uncompr_bytes, niter) + + +if __name__ == "__main__": + import sys + import getopt + + try: + import psyco + + psyco_imported = 1 + except Exception: + psyco_imported = 0 + + usage = ( + """usage: %s [-v] [-p] [-R range] [-r] [-w] [-s recsize ] [-a + atom] [-c level] [-l complib] [-S] [-F] [-i item] [-n nrows] [-x] + [-k niter] file + -v verbose + -p use "psyco" if available + -R select a range in a field in the form "start,stop,step" + -r only read test + -w only write test + -s record size + -a use [float], [int], [bool] or [string] atom + -c sets a compression level (do not set it or 0 for no compression) + -S activate shuffling filter + -F activate fletcher32 filter + -l sets the compression library to be used ("zlib", "lzo", "ucl", "bzip2") + -i item to search + -n set the number of rows in tables + -x don't make indexes + -k number of iterations for reading\n""" + % sys.argv[0] + ) + + try: + opts, pargs = getopt.getopt(sys.argv[1:], "vpSFR:rwxk:s:a:c:l:i:n:") + except Exception: + sys.stderr.write(usage) + sys.exit(0) + + # if we pass too much parameters, abort + if len(pargs) != 1: + sys.stderr.write(usage) + sys.exit(0) + + # default options + verbose = 0 + rng = None + item = None + atom = "int" + field_name = None + testread = 1 + testwrite = 1 + usepsyco = 0 + complevel = 0 + shuffle = 0 + fletcher32 = 0 + complib = "zlib" + nrows = 100 + recsize = "small" + index = 1 + niter = 1 + + # Get the options + for option in opts: + if option[0] == "-v": + verbose = 1 + if option[0] == "-p": + usepsyco = 1 + if option[0] == "-S": + shuffle = 1 + if option[0] == "-F": + fletcher32 = 1 + elif option[0] == "-R": + rng = [int(i) for i in option[1].split(",")] + elif option[0] == "-r": + testwrite = 0 + elif option[0] == "-w": + testread = 0 + elif option[0] == "-x": + index = 0 + elif option[0] == "-s": + recsize = option[1] + elif option[0] == "-a": + atom = option[1] + if atom not in ["float", "int", "bool", "string"]: + sys.stderr.write(usage) + sys.exit(0) + elif option[0] == "-c": + complevel = int(option[1]) + elif option[0] == "-l": + complib = option[1] + elif option[0] == "-i": + item = eval(option[1]) + elif option[0] == "-n": + nrows = int(option[1]) + elif option[0] == "-k": + niter = int(option[1]) + + # Build the Filters instance + filters = tb.Filters( + complevel=complevel, + complib=complib, + shuffle=shuffle, + fletcher32=fletcher32, + ) + + # Catch the hdf5 file passed as the last argument + file = pargs[0] + + if testwrite: + print("Compression level:", complevel) + if complevel > 0: + print("Compression library:", complib) + if shuffle: + print("Suffling...") + t1 = clock() + cpu1 = cpuclock() + if psyco_imported and usepsyco: + psyco.bind(create_file) + (rowsw, rowsz) = create_file( + file, nrows, filters, atom, recsize, index, verbose + ) + t2 = clock() + cpu2 = cpuclock() + tapprows = t2 - t1 + cpuapprows = cpu2 - cpu1 + print(f"Rows written: {rowsw} Row size: {rowsz}") + print( + f"Time writing rows: {tapprows:.3f} s (real) " + f"{cpuapprows:.3f} s (cpu) {cpuapprows / tapprows:.0%}" + ) + print(f"Write rows/sec: {rowsw / tapprows:.0f}") + print(f"Write KB/s : {rowsw * rowsz / tapprows / 1024:.0f}") + + if testread: + if psyco_imported and usepsyco: + psyco.bind(read_file) + psyco.bind(search_file) + t1 = clock() + cpu1 = cpuclock() + if rng or item: + (rowsr, uncompr_b, niter) = search_file(file, atom, verbose, item) + else: + for i in range(1): + (rowsr, rowsel, rowsz) = read_file(file, atom, niter, verbose) + t2 = clock() + cpu2 = cpuclock() + treadrows = t2 - t1 + cpureadrows = cpu2 - cpu1 + t_m_rows = rowsr / 1000 / 1000 + s_k_rows = rowsel / 1000 + print(f"Rows read: {rowsr} Mread: {t_m_rows:.3f} Mrows") + print(f"Rows selected: {rowsel} Ksel: {s_k_rows:.3f} Krows") + print( + f"Time reading rows: {treadrows:.3f} s (real) " + f"{cpureadrows:.3f} s (cpu) {cpureadrows / treadrows:.0%}" + ) + print(f"Read Mrows/sec: {t_m_rows / treadrows:.3f}") + # print "Read KB/s :", int(rowsr * rowsz / (treadrows * 1024)) +# print "Uncompr MB :", int(uncomprB / (1024 * 1024)) +# print "Uncompr MB/s :", int(uncomprB / (treadrows * 1024 * 1024)) +# print "Total chunks uncompr :", int(niter) diff --git a/bench/searchsorted-bench2.py b/bench/searchsorted-bench2.py new file mode 100644 index 0000000..14f35c0 --- /dev/null +++ b/bench/searchsorted-bench2.py @@ -0,0 +1,362 @@ +#!/usr/bin/env python + +from time import perf_counter as clock +from time import process_time as cpuclock + +import tables as tb + + +class Small(tb.IsDescription): + var1 = tb.StringCol(itemsize=4) + var2 = tb.Int32Col() + var3 = tb.Float64Col() + var4 = tb.BoolCol() + + +# Define a user record to characterize some kind of particles + + +class Medium(tb.IsDescription): + var1 = tb.StringCol(itemsize=16, dflt="") # 16-character String + # float1 = Float64Col(dflt=2.3) + # float2 = Float64Col(dflt=2.3) + # zADCcount = Int16Col() # signed short integer + var2 = tb.Int32Col() # signed short integer + var3 = tb.Float64Col() + grid_i = tb.Int32Col() # integer + grid_j = tb.Int32Col() # integer + pressure = tb.Float32Col() # float (single-precision) + energy = tb.Float64Col(shape=2) # double (double-precision) + + +def create_file(filename, nrows, filters, atom, recsize, index, verbose): + + # Open a file in "w"rite mode + fileh = tb.open_file( + filename, mode="w", title="Searchsorted Benchmark", filters=filters + ) + title = "This is the IndexArray title" + # Create an IndexArray instance + rowswritten = 0 + # Create an entry + klass = {"small": Small, "medium": Medium} + table = fileh.create_table( + fileh.root, "table", klass[recsize], title, None, nrows + ) + for i in range(nrows): + # table.row['var1'] = str(i) + # table.row['var2'] = random.randrange(nrows) + table.row["var2"] = i + table.row["var3"] = i + # table.row['var4'] = i % 2 + table.row["var4"] = i > 2 + table.row.append() + rowswritten += nrows + table.flush() + rowsize = table.rowsize + indexrows = 0 + + # Index one entry: + if index: + if atom == "string": + indexrows = table.cols.var1.create_index() + elif atom == "bool": + indexrows = table.cols.var4.create_index() + elif atom == "int": + indexrows = table.cols.var2.create_index() + elif atom == "float": + indexrows = table.cols.var3.create_index() + else: + raise ValueError("Index type not supported yet") + if verbose: + print("Number of indexed rows:", indexrows) + # Close the file (eventually destroy the extended type) + fileh.close() + + return (rowswritten, rowsize) + + +def read_file(filename, atom, niter, verbose): + # Open the HDF5 file in read-only mode + + fileh = tb.open_file(filename, mode="r") + table = fileh.root.table + print("reading", table) + if atom == "string": + idxcol = table.cols.var1.index + elif atom == "bool": + idxcol = table.cols.var4.index + elif atom == "int": + idxcol = table.cols.var2.index + else: + idxcol = table.cols.var3.index + if verbose: + print("Max rows in buf:", table.nrowsinbuf) + print("Rows in", table._v_pathname, ":", table.nrows) + print("Buffersize:", table.rowsize * table.nrowsinbuf) + print("MaxTuples:", table.nrowsinbuf) + print("Chunk size:", idxcol.sorted.chunksize) + print("Number of elements per slice:", idxcol.nelemslice) + print("Slice number in", table._v_pathname, ":", idxcol.nrows) + + rowselected = 0 + if atom == "string": + for i in range(niter): + # results = [table.row["var3"] for i in table(where=2+i<=table.cols.var2 < 10+i)] + # results = [table.row.nrow() for i in table(where=2<=table.cols.var2 < 10)] + results = [ + p["var1"] + for p in table(where=table.cols.var1 == "1111") # p.nrow() + ] + # for p in table(where="1000"<=table.cols.var1<="1010")] + rowselected += len(results) + elif atom == "bool": + for i in range(niter): + results = [ + p["var2"] for p in table(where=table.cols.var4 == 0) + ] # p.nrow() + rowselected += len(results) + elif atom == "int": + for i in range(niter): + # results = [table.row["var3"] for i in table(where=2+i<=table.cols.var2 < 10+i)] + # results = [table.row.nrow() for i in table(where=2<=table.cols.var2 < 10)] + results = [ + p["var2"] # p.nrow() + # for p in table(where=110*i<=table.cols.var2<110*(i+1))] + # for p in table(where=1000-30", positions) + print("Total iterations in search:", niter) + + rowsread += table.nrows + uncompr_bytes += idxcol.sorted.chunksize * niter * idxcol.sorted.itemsize + + results = table.read(coords=positions) + print("results length:", len(results)) + if verbose: + print("Values that fullfill the conditions:") + print(results) + + # Close the file (eventually destroy the extended type) + fileh.close() + + return (rowsread, uncompr_bytes, niter) + + +if __name__ == "__main__": + import sys + import getopt + + try: + import psyco + + psyco_imported = 1 + except Exception: + psyco_imported = 0 + + usage = ( + """usage: %s [-v] [-p] [-R range] [-r] [-w] [-s recsize ] [-a + atom] [-c level] [-l complib] [-S] [-F] [-i item] [-n nrows] [-x] + [-k niter] file + -v verbose + -p use "psyco" if available + -R select a range in a field in the form "start,stop,step" + -r only read test + -w only write test + -s record size + -a use [float], [int], [bool] or [string] atom + -c sets a compression level (do not set it or 0 for no compression) + -S activate shuffling filter + -F activate fletcher32 filter + -l sets the compression library to be used ("zlib", "lzo", "ucl", "bzip2") + -i item to search + -n set the number of rows in tables + -x don't make indexes + -k number of iterations for reading\n""" + % sys.argv[0] + ) + + try: + opts, pargs = getopt.getopt(sys.argv[1:], "vpSFR:rwxk:s:a:c:l:i:n:") + except Exception: + sys.stderr.write(usage) + sys.exit(0) + + # if we pass too much parameters, abort + if len(pargs) != 1: + sys.stderr.write(usage) + sys.exit(0) + + # default options + verbose = 0 + rng = None + item = None + atom = "int" + field_name = None + testread = 1 + testwrite = 1 + usepsyco = 0 + complevel = 0 + shuffle = 0 + fletcher32 = 0 + complib = "zlib" + nrows = 100 + recsize = "small" + index = 1 + niter = 1 + + # Get the options + for option in opts: + if option[0] == "-v": + verbose = 1 + if option[0] == "-p": + usepsyco = 1 + if option[0] == "-S": + shuffle = 1 + if option[0] == "-F": + fletcher32 = 1 + elif option[0] == "-R": + rng = [int(i) for i in option[1].split(",")] + elif option[0] == "-r": + testwrite = 0 + elif option[0] == "-w": + testread = 0 + elif option[0] == "-x": + index = 0 + elif option[0] == "-s": + recsize = option[1] + elif option[0] == "-a": + atom = option[1] + if atom not in ["float", "int", "bool", "string"]: + sys.stderr.write(usage) + sys.exit(0) + elif option[0] == "-c": + complevel = int(option[1]) + elif option[0] == "-l": + complib = option[1] + elif option[0] == "-i": + item = eval(option[1]) + elif option[0] == "-n": + nrows = int(option[1]) + elif option[0] == "-k": + niter = int(option[1]) + + # Build the Filters instance + filters = tb.Filters( + complevel=complevel, + complib=complib, + shuffle=shuffle, + fletcher32=fletcher32, + ) + + # Catch the hdf5 file passed as the last argument + file = pargs[0] + + if testwrite: + print("Compression level:", complevel) + if complevel > 0: + print("Compression library:", complib) + if shuffle: + print("Suffling...") + t1 = clock() + cpu1 = cpuclock() + if psyco_imported and usepsyco: + psyco.bind(create_file) + (rowsw, rowsz) = create_file( + file, nrows, filters, atom, recsize, index, verbose + ) + t2 = clock() + cpu2 = cpuclock() + tapprows = t2 - t1 + cpuapprows = cpu2 - cpu1 + print(f"Rows written: {rowsw} Row size: {rowsz}") + print( + f"Time writing rows: {tapprows:.3f} s (real) " + f"{cpuapprows:.3f} s (cpu) {cpuapprows / tapprows:.0%}" + ) + print(f"Write rows/sec: {rowsw / tapprows:.0f}") + print(f"Write KB/s : {rowsw * rowsz / (tapprows * 1024):.0f}") + + if testread: + if psyco_imported and usepsyco: + psyco.bind(read_file) + psyco.bind(search_file) + t1 = clock() + cpu1 = cpuclock() + if rng or item: + (rowsr, uncompr_b, niter) = search_file(file, atom, verbose, item) + else: + for i in range(1): + (rowsr, rowsel, rowsz) = read_file(file, atom, niter, verbose) + t2 = clock() + cpu2 = cpuclock() + treadrows = t2 - t1 + cpureadrows = cpu2 - cpu1 + t_m_rows = rowsr / 1000 / 1000 + s_k_rows = rowsel / 1000 + print(f"Rows read: {rowsr} Mread: {t_m_rows:.3f} Mrows") + print(f"Rows selected: {rowsel} Ksel: {s_k_rows:.3f} Krows") + print( + f"Time reading rows: {treadrows:.3f} s (real) " + f"{cpureadrows:.3f} s (cpu) {cpureadrows / treadrows:.0%}" + ) + print(f"Read Mrows/sec: {t_m_rows / treadrows:.3f}") + # print "Read KB/s :", int(rowsr * rowsz / (treadrows * 1024)) +# print "Uncompr MB :", int(uncomprB / (1024 * 1024)) +# print "Uncompr MB/s :", int(uncomprB / (treadrows * 1024 * 1024)) +# print "Total chunks uncompr :", int(niter) diff --git a/bench/shelve-bench.py b/bench/shelve-bench.py new file mode 100644 index 0000000..94f079b --- /dev/null +++ b/bench/shelve-bench.py @@ -0,0 +1,214 @@ +#!/usr/bin/env python + +import sys +import shelve +import struct + +import numpy as np +import psyco + +import tables as tb + +# This class is accessible only for the examples + + +class Small(tb.IsDescription): + """Record descriptor. + + A record has several columns. They are represented here as class + attributes, whose names are the column names and their values will + become their types. The IsDescription class will take care the user + will not add any new variables and that its type is correct. + + """ + + var1 = tb.StringCol(itemsize=4) + var2 = tb.Int32Col() + var3 = tb.Float64Col() + + +# Define a user record to characterize some kind of particles + + +class Medium(tb.IsDescription): + name = tb.StringCol(itemsize=16) # 16-character String + float1 = tb.Float64Col(shape=2, dflt=2.3) + # float1 = Float64Col(dflt=1.3) + # float2 = Float64Col(dflt=2.3) + ADCcount = tb.Int16Col() # signed short integer + grid_i = tb.Int32Col() # integer + grid_j = tb.Int32Col() # integer + pressure = tb.Float32Col() # float (single-precision) + energy = tb.Float64Col() # double (double-precision) + + +# Define a user record to characterize some kind of particles + + +class Big(tb.IsDescription): + name = tb.StringCol(itemsize=16) # 16-character String + # float1 = Float64Col(shape=32, dflt=np.arange(32)) + # float2 = Float64Col(shape=32, dflt=np.arange(32)) + float1 = tb.Float64Col(shape=32, dflt=range(32)) + float2 = tb.Float64Col(shape=32, dflt=[2.2] * 32) + ADCcount = tb.Int16Col() # signed short integer + grid_i = tb.Int32Col() # integer + grid_j = tb.Int32Col() # integer + pressure = tb.Float32Col() # float (single-precision) + energy = tb.Float64Col() # double (double-precision) + + +def create_file(filename, totalrows, recsize): + + # Open a 'n'ew file + fileh = shelve.open(filename, flag="n") + + rowswritten = 0 + # Get the record object associated with the new table + if recsize == "big": + d = Big() + arr = np.arange(32, dtype=np.float64) + arr2 = np.arange(32, dtype=np.float64) + elif recsize == "medium": + d = Medium() + else: + d = Small() + # print d + # sys.exit(0) + for j in range(3): + # Create a table + # table = fileh.create_table(group, 'tuple'+str(j), Record(), title, + # compress = 6, expectedrows = totalrows) + # Create a Table instance + tablename = "tuple" + str(j) + table = [] + # Fill the table + if recsize == "big" or recsize == "medium": + for i in range(totalrows): + d.name = "Particle: %6d" % (i) + # d.TDCcount = i % 256 + d.ADCcount = (i * 256) % (1 << 16) + if recsize == "big": + # d.float1 = np.array([i]*32, np.float64) + # d.float2 = np.array([i**2]*32, np.float64) + arr[0] = 1.1 + d.float1 = arr + arr2[0] = 2.2 + d.float2 = arr2 + pass + else: + d.float1 = np.array([i**2] * 2, np.float64) + # d.float1 = float(i) + # d.float2 = float(i) + d.grid_i = i + d.grid_j = 10 - i + d.pressure = float(i * i) + d.energy = float(d.pressure**4) + table.append( + ( + d.ADCcount, + d.energy, + d.float1, + d.float2, + d.grid_i, + d.grid_j, + d.name, + d.pressure, + ) + ) + # Only on float case + # table.append((d.ADCcount, d.energy, d.float1, + # d.grid_i, d.grid_j, d.name, d.pressure)) + else: + for i in range(totalrows): + d.var1 = str(i) + d.var2 = i + d.var3 = 12.1e10 + table.append((d.var1, d.var2, d.var3)) + + # Save this table on disk + fileh[tablename] = table + rowswritten += totalrows + + # Close the file + fileh.close() + return (rowswritten, struct.calcsize(d._v_fmt)) + + +def read_file(filename, recsize): + # Open the HDF5 file in read-only mode + fileh = shelve.open(filename, "r") + for table in ["tuple0", "tuple1", "tuple2"]: + if recsize == "big" or recsize == "medium": + e = [t[2] for t in fileh[table] if t[4] < 20] + # if there is only one float (array) + # e = [ t[1] for t in fileh[table] if t[3] < 20 ] + else: + e = [t[1] for t in fileh[table] if t[1] < 20] + + print("resulting selection list ==>", e) + print("Total selected records ==> ", len(e)) + + # Close the file (eventually destroy the extended type) + fileh.close() + + +# Add code to test here +if __name__ == "__main__": + import getopt + from time import perf_counter as clock + + usage = ( + """usage: %s [-f] [-s recsize] [-i iterations] file + -s use [big] record, [medium] or [small] + -i sets the number of rows in each table\n""" + % sys.argv[0] + ) + + try: + opts, pargs = getopt.getopt(sys.argv[1:], "s:fi:") + except Exception: + sys.stderr.write(usage) + sys.exit(0) + + # if we pass too much parameters, abort + if len(pargs) != 1: + sys.stderr.write(usage) + sys.exit(0) + + # default options + recsize = "medium" + iterations = 100 + + # Get the options + for option in opts: + if option[0] == "-s": + recsize = option[1] + if recsize not in ["big", "medium", "small"]: + sys.stderr.write(usage) + sys.exit(0) + elif option[0] == "-i": + iterations = int(option[1]) + + # Catch the hdf5 file passed as the last argument + file = pargs[0] + + t1 = clock() + psyco.bind(create_file) + (rowsw, rowsz) = create_file(file, iterations, recsize) + t2 = clock() + tapprows = t2 - t1 + + t1 = clock() + psyco.bind(read_file) + read_file(file, recsize) + t2 = clock() + treadrows = t2 - t1 + + print(f"Rows written: {rowsw} Row size: {rowsz}") + print(f"Time appending rows: {tapprows:.3f}") + print(f"Write rows/sec: {iterations * 3 / tapprows:.0f}") + print(f"Write KB/s : {rowsw * rowsz / (tapprows * 1024):.0f}") + print(f"Time reading rows: {treadrows:.3f}") + print(f"Read rows/sec: {iterations * 3 / treadrows:.0f}") + print(f"Read KB/s : {rowsw * rowsz / (treadrows * 1024):.0f}") diff --git a/bench/simple-table.py b/bench/simple-table.py new file mode 100644 index 0000000..389c017 --- /dev/null +++ b/bench/simple-table.py @@ -0,0 +1,55 @@ +#!/usr/bin/env python + +from time import time + +import numpy as np + +import tables as tb + +# Size of the buffer to be appended +M = 1_000_000 +# Number of rows in table +N = 100 * M + +filename = "simple-table-blosc2.h5" +filters = tb.Filters(9, "blosc2", shuffle=True) +# filters = tb.Filters(9, "zlib", shuffle=True) +# dt = np.dtype([('int32', np.int32), ('float32', np.float32, 10)]) +dt = np.dtype([("int32", np.int32), ("float32", np.float32)]) + +a = np.fromiter(((i, i) for i in range(M)), dtype=dt) +# a = np.zeros(M, dtype=dt) + +if 1: + output_file = tb.open_file(filename, mode="w", PYTABLES_SYS_ATTRS=False) + table = output_file.create_table( + "/", "test", dt, filters=filters, expectedrows=N + ) # , chunkshape=131072) + chunkshape = table.chunkshape[0] + print("chunkshape:", chunkshape) + + t0 = time() + for i in range(0, N, M): + table.append(a) + # table.append(a[1:-1]) + table.flush() + print(f"Time for storing: {time() - t0 : .3f}s") + + output_file.close() + +print("Start reads:") + +output_file = tb.open_file(filename) +table = output_file.root.test + +t0 = time() +result = 0 +# for i in range(0, N, 8192): +# result += table[i]['int32'] +# for row in table: +# result += row['int32'] +b = table[:] +print(f"Time for reading: {time() - t0 : .3f}s") +print("result:", result) + +output_file.close() diff --git a/bench/split-file.py b/bench/split-file.py new file mode 100644 index 0000000..4914892 --- /dev/null +++ b/bench/split-file.py @@ -0,0 +1,39 @@ +""" +Split out a monolithic file with many different runs of +indexed_search.py. The resulting files are meant for use in +get-figures.py. + +Usage: python split-file.py prefix filename +""" + +import sys +from pathlib import Path + +prefix = sys.argv[1] +filename = sys.argv[2] +sf = None +for line in Path(filename).read_text().splitlines(): + if line.startswith("Processing database:"): + if sf: + sf.close() + line2 = line.split(":")[1] + # Check if entry is compressed and if has to be processed + line2 = line2[: line2.rfind(".")] + params = line2.split("-") + optlevel = 0 + complib = None + for param in params: + if param[0] == "O" and param[1].isdigit(): + optlevel = int(param[1]) + elif param[:-1] in ("zlib", "lzo"): + complib = param + if "PyTables" in prefix: + if complib: + sfilename = f"{prefix}-O{optlevel}-{complib}.out" + else: + sfilename = f"{prefix}-O{optlevel}.out" + else: + sfilename = f"{prefix}.out" + sf = open(sfilename, "a") + if sf: + sf.write(line) diff --git a/bench/sqlite-search-bench-rnd.sh b/bench/sqlite-search-bench-rnd.sh new file mode 100755 index 0000000..8890680 --- /dev/null +++ b/bench/sqlite-search-bench-rnd.sh @@ -0,0 +1,105 @@ +#!/bin/sh +# I don't know why, but the /usr/bin/python2.3 from Debian is a 30% slower +# than my own compiled version! 2004-08-18 +python="/usr/local/bin/python2.3 -O" + +writedata () { + nrows=$1 + bfile=$2 + smode=$3 + psyco=$4 + cmd="${python} sqlite-search-bench.py -R -h -b ${bfile} ${psyco} -m ${smode} -w -n ${nrows} data.nobackup/sqlite-${nrows}k.h5" + echo ${cmd} + ${cmd} +} + +readdata () { + nrows=$1 + bfile=$2 + smode=$3 + psyco=$4 + + if [ "$smode" = "indexed" ]; then + #repeats=100 + repeats=20 + else + repeats=2 + fi + cmd="${python} sqlite-search-bench.py -R -h -b ${bfile} ${psyco} -n ${nrows} -m ${smode} -r -k ${repeats} data.nobackup/sqlite-${nrows}k.h5" + echo ${cmd} + ${cmd} + # Finally, delete the source (if desired) + if [ "$smode" = "indexed" ]; then + echo "Deleting data file data.nobackup/sqlite-${nrows}k.h5" +# rm -f data.nobackup/sqlite-${nrows}k.h5 + fi + return +} + +overwrite=0 +if [ $# > 1 ]; then + if [ "$1" = "-o" ]; then + overwrite=1 + fi +fi +if [ $# > 2 ]; then + psyco=$2 +fi + +# The name of the data bench file +bfile="sqlite-dbench.h5" + +# Move out a possible previous benchmark file +bn=`basename $bfile ".h5"` +mv -f ${bn}-bck2.h5 ${bn}-bck3.h5 +mv -f ${bn}-bck.h5 ${bn}-bck2.h5 +if [ "$overwrite" = "1" ]; then + echo "moving ${bn}.h5 to ${bn}-bck.h5" + mv -f ${bn}.h5 ${bn}-bck.h5 +else + echo "copying ${bn}.h5 to ${bn}-bck.h5" + cp -f ${bn}.h5 ${bn}-bck.h5 +fi + +# Configuration for testing +nrowsliststd="1 2" +nrowslistidx="1 2" +#nrowsliststd="1 2 5 10 20 50 100 200 500 1000 2000 5000 10000 20000 50000" +#nrowsliststd="1 2 5 10 20" +#nrowslistidx="1 2 5 10 20" +# nrowsliststd="1 2 5 10 20 50 100 200 500 1000 2000 5000 10000" +# nrowslistidx="1 2 5 10 20 50 100 200 500 1000 2000 5000 10000" +#nrowsliststd="1 2 5 10 20 50 100 200 500 1000 2000 5000 10000 20000 50000 100000" +#nrowslistidx="1 2 5 10 20 50 100 200 500 1000 2000 5000 10000 20000 50000 100000" + +for smode in standard indexed; do +#for smode in indexed; do + echo + echo "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" + echo "Entering ${smode} mode..." + echo "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" + echo + if [ "$smode" = "standard" ]; then + nrowslist=$nrowsliststd + else + nrowslist=$nrowslistidx + fi + # Write data files + for nrows in $nrowslist; do + echo "*************************************************************" + echo "Writing for nrows=$nrows Krows, $smode, psyco=$psyco" + echo "*************************************************************" + writedata ${nrows} ${bfile} "${smode}" "${psyco}" + done + # Read data files + ${python} cacheout.py + for nrows in $nrowslist; do + echo "***********************************************************" + echo "Searching for nrows=$nrows Krows, $smode, psyco=$psyco" + echo "***********************************************************" + readdata ${nrows} ${bfile} "${smode}" "${psyco}" + done +done + +echo "New data available on: $bfile" +exit 0 diff --git a/bench/sqlite-search-bench.py b/bench/sqlite-search-bench.py new file mode 100644 index 0000000..1fd51cd --- /dev/null +++ b/bench/sqlite-search-bench.py @@ -0,0 +1,466 @@ +#!/usr/bin/python + +import os +import sys +import random +import sqlite3 +from time import perf_counter as clock +from time import process_time as cpuclock +from pathlib import Path + +import numpy as np + +import tables as tb + +randomvalues = 0 +standarddeviation = 10_000 +# Initialize the random generator always with the same integer +# in order to have reproductible results +random.seed(19) +np.random.seed((19, 20)) + +# defaults +psycon = 0 +worst = 0 + + +def create_new_bench_file(bfile, verbose): + + class Create(tb.IsDescription): + nrows = tb.Int32Col(pos=0) + irows = tb.Int32Col(pos=1) + tfill = tb.Float64Col(pos=2) + tidx = tb.Float64Col(pos=3) + tcfill = tb.Float64Col(pos=4) + tcidx = tb.Float64Col(pos=5) + rowsecf = tb.Float64Col(pos=6) + rowseci = tb.Float64Col(pos=7) + fsize = tb.Float64Col(pos=8) + isize = tb.Float64Col(pos=9) + psyco = tb.BoolCol(pos=10) + + class Search(tb.IsDescription): + nrows = tb.Int32Col(pos=0) + rowsel = tb.Int32Col(pos=1) + time1 = tb.Float64Col(pos=2) + time2 = tb.Float64Col(pos=3) + tcpu1 = tb.Float64Col(pos=4) + tcpu2 = tb.Float64Col(pos=5) + rowsec1 = tb.Float64Col(pos=6) + rowsec2 = tb.Float64Col(pos=7) + psyco = tb.BoolCol(pos=8) + + if verbose: + print("Creating a new benchfile:", bfile) + # Open the benchmarking file + bf = tb.open_file(bfile, "w") + # Create groups + for recsize in ["sqlite_small"]: + group = bf.create_group("/", recsize, recsize + " Group") + # Attach the row size of table as attribute + if recsize == "small": + group._v_attrs.rowsize = 16 + # Create a Table for writing bench + bf.create_table(group, "create_indexed", Create, "indexed values") + bf.create_table(group, "create_standard", Create, "standard values") + # create a group for searching bench + group_s = bf.create_group(group, "search", "Search Group") + # Create Tables for searching + for mode in ["indexed", "standard"]: + group = bf.create_group(group_s, mode, mode + " Group") + # for searching bench + # for atom in ["string", "int", "float", "bool"]: + for atom in ["string", "int", "float"]: + bf.create_table(group, atom, Search, atom + " bench") + bf.close() + + +def create_file( + filename, nrows, filters, indexmode, heavy, noise, bfile, verbose +): + + # Initialize some variables + t1 = 0 + t2 = 0 + tcpu1 = 0 + tcpu2 = 0 + rowsecf = 0 + rowseci = 0 + size1 = 0 + size2 = 0 + + if indexmode == "standard": + print("Creating a new database:", dbfile) + instd = os.popen("/usr/local/bin/sqlite " + dbfile, "w") + CREATESTD = """ +CREATE TABLE small ( +-- Name Type -- Example +--------------------------------------- +recnum INTEGER PRIMARY KEY, -- 345 +var1 char(4), -- Abronia villosa +var2 INTEGER, -- 111 +var3 FLOAT -- 12.32 +); +""" # noqa: N806 + CREATEIDX = """ +CREATE TABLE small ( +-- Name Type -- Example +--------------------------------------- +recnum INTEGER PRIMARY KEY, -- 345 +var1 char(4), -- Abronia villosa +var2 INTEGER, -- 111 +var3 FLOAT -- 12.32 +); +CREATE INDEX ivar1 ON small(var1); +CREATE INDEX ivar2 ON small(var2); +CREATE INDEX ivar3 ON small(var3); +""" # noqa: F841,N806 + # Creating the table first and indexing afterwards is a bit faster + instd.write(CREATESTD) + instd.close() + + conn = sqlite3.connect(dbfile) + cursor = conn.cursor() + if indexmode == "standard": + place_holders = ",".join(["%s"] * 3) + # Insert rows + SQL = f"insert into small values(NULL, {place_holders})" # noqa: N806 + time1 = clock() + cpu1 = cpuclock() + # This way of filling is to copy the PyTables benchmark + nrowsbuf = 1000 + minimum = 0 + maximum = nrows + for i in range(0, nrows, nrowsbuf): + if i + nrowsbuf > nrows: + j = nrows + else: + j = i + nrowsbuf + if randomvalues: + var3 = np.random.uniform(minimum, maximum, shape=[j - i]) + else: + var3 = np.arange(i, j, type=np.float64) + if noise: + var3 += np.random.uniform(-3, 3, shape=[j - i]) + var2 = np.array(var3, type=np.int32) + var1 = np.array(None, shape=[j - i], dtype="s4") + if not heavy: + for n in range(j - i): + var1[n] = str("%.4s" % var2[n]) + for n in range(j - i): + fields = (var1[n], var2[n], var3[n]) + cursor.execute(SQL, fields) + conn.commit() + t1 = clock() - time1 + tcpu1 = cpuclock() - cpu1 + rowsecf = nrows / t1 + size1 = Path(dbfile).stat().st_size + print(f"******** Results for writing nrows = {nrows} *********") + print(f"Insert time: {t1:.5f}, KRows/s: {nrows / 1000 / t1:.3f}") + print(f", File size: {size1 / 1024 / 1024:.3f} MB") + + # Indexem + if indexmode == "indexed": + time1 = clock() + cpu1 = cpuclock() + if not heavy: + cursor.execute("CREATE INDEX ivar1 ON small(var1)") + conn.commit() + cursor.execute("CREATE INDEX ivar2 ON small(var2)") + conn.commit() + cursor.execute("CREATE INDEX ivar3 ON small(var3)") + conn.commit() + t2 = clock() - time1 + tcpu2 = cpuclock() - cpu1 + rowseci = nrows / t2 + print(f"Index time: {t2:.5f}, IKRows/s: {nrows / 1000 / t2:.3f}") + size2 = Path(dbfile).stat().st_size - size1 + print(f", Final size with index: {size2 / 1024 / 1024:.3f} MB") + + conn.close() + + # Collect benchmark data + bf = tb.open_file(bfile, "a") + recsize = "sqlite_small" + if indexmode == "indexed": + table = bf.get_node("/" + recsize + "/create_indexed") + else: + table = bf.get_node("/" + recsize + "/create_standard") + table.row["nrows"] = nrows + table.row["irows"] = nrows + table.row["tfill"] = t1 + table.row["tidx"] = t2 + table.row["tcfill"] = tcpu1 + table.row["tcidx"] = tcpu2 + table.row["psyco"] = psycon + table.row["rowsecf"] = rowsecf + table.row["rowseci"] = rowseci + table.row["fsize"] = size1 + table.row["isize"] = size2 + table.row.append() + bf.close() + + return + + +def read_file(dbfile, nrows, indexmode, heavy, dselect, bfile, riter): + # Connect to the database. + conn = sqlite3.connect(db=dbfile, mode=755) + # Obtain a cursor + cursor = conn.cursor() + + # select count(*), avg(var2) + SQL1 = """ + select recnum + from small where var1 = %s + """ # noqa: N806 + SQL2 = """ + select recnum + from small where var2 >= %s and var2 < %s + """ # noqa: N806 + SQL3 = """ + select recnum + from small where var3 >= %s and var3 < %s + """ # noqa: N806 + + # Open the benchmark database + bf = tb.open_file(bfile, "a") + # default values for the case that columns are not indexed + t2 = 0 + tcpu2 = 0 + # Some previous computations for the case of random values + if randomvalues: + # algorithm to choose a value separated from mean + # If want to select fewer values, select this + # if nrows/2 > standarddeviation*3: + # Choose five standard deviations away from mean value + # dev = standarddeviation*5 + # dev = standarddeviation*math.log10(nrows/1000.) + + # This algorithm give place to too asymmetric result values + # if standarddeviation*10 < nrows/2: + # Choose four standard deviations away from mean value + # dev = standarddeviation*4 + # else: + # dev = 100 + # Yet Another Algorithm + if nrows / 2 > standarddeviation * 10: + dev = standarddeviation * 4 + elif nrows / 2 > standarddeviation: + dev = standarddeviation * 2 + elif nrows / 2 > standarddeviation / 10: + dev = standarddeviation / 10 + else: + dev = standarddeviation / 100 + + valmax = round(nrows / 2 - dev) + # split the selection range in regular chunks + if riter > valmax * 2: + riter = valmax * 2 + chunksize = (valmax * 2 / riter) * 10 + # Get a list of integers for the intervals + randlist = range(0, valmax, chunksize) + randlist.extend(range(nrows - valmax, nrows, chunksize)) + # expand the list ten times so as to use the cache + randlist = randlist * 10 + # shuffle the list + random.shuffle(randlist) + # reset the value of chunksize + chunksize = chunksize / 10 + # print "chunksize-->", chunksize + # randlist.sort();print "randlist-->", randlist + else: + chunksize = 3 + if heavy: + searchmodelist = ["int", "float"] + else: + searchmodelist = ["string", "int", "float"] + + # Execute queries + for atom in searchmodelist: + time2 = 0 + cpu2 = 0 + rowsel = 0 + for i in range(riter): + rnd = random.randrange(nrows) + time1 = clock() + cpu1 = cpuclock() + if atom == "string": + # cursor.execute(SQL1, "1111") + cursor.execute(SQL1, str(rnd)[-4:]) + elif atom == "int": + # cursor.execute(SQL2 % (rnd, rnd+3)) + cursor.execute(SQL2 % (rnd, rnd + dselect)) + elif atom == "float": + # cursor.execute(SQL3 % (float(rnd), float(rnd+3))) + cursor.execute(SQL3 % (float(rnd), float(rnd + dselect))) + else: + raise ValueError( + "atom must take a value in ['string','int','float']" + ) + if i == 0: + t1 = clock() - time1 + tcpu1 = cpuclock() - cpu1 + else: + if indexmode == "indexed": + # if indexed, wait until the 5th iteration to take + # times (so as to insure that the index is + # effectively cached) + if i >= 5: + time2 += clock() - time1 + cpu2 += cpuclock() - cpu1 + else: + time2 += clock() - time1 + time2 += cpuclock() - cpu1 + if riter > 1: + if indexmode == "indexed" and riter >= 5: + correction = 5 + else: + correction = 1 + t2 = time2 / (riter - correction) + tcpu2 = cpu2 / (riter - correction) + + print( + f"*** Query results for atom = {atom}, " + f"nrows = {nrows}, indexmode = {indexmode} ***" + ) + print(f"Query time: {t1:.5f}, cached time: {t2:.5f}") + print(f"MRows/s: {nrows / 1_000_000 / t1:.3f}", end=" ") + if t2 > 0: + print(f", cached MRows/s: {nrows / 10 ** 6 / t2:.3f}") + else: + print() + + # Collect benchmark data + recsize = "sqlite_small" + tablepath = "/" + recsize + "/search/" + indexmode + "/" + atom + table = bf.get_node(tablepath) + table.row["nrows"] = nrows + table.row["rowsel"] = rowsel + table.row["time1"] = t1 + table.row["time2"] = t2 + table.row["tcpu1"] = tcpu1 + table.row["tcpu2"] = tcpu2 + table.row["psyco"] = psycon + table.row["rowsec1"] = nrows / t1 + if t2 > 0: + table.row["rowsec2"] = nrows / t2 + table.row.append() + table.flush() # Flush the data + + # Close the database + conn.close() + bf.close() # the bench database + + return + + +if __name__ == "__main__": + import getopt + + try: + import psyco + + psyco_imported = 1 + except Exception: + psyco_imported = 0 + + usage = ( + """usage: %s [-v] [-p] [-R] [-h] [-t] [-r] [-w] [-n nrows] [-b file] [-k riter] [-m indexmode] [-N range] datafile + -v verbose + -p use "psyco" if available + -R use Random values for filling + -h heavy mode (exclude strings from timings) + -t worsT searching case (to emulate PyTables worst cases) + -r only read test + -w only write test + -n the number of rows (in krows) + -b bench filename + -N introduce (uniform) noise within range into the values + -d the interval for look values (int, float) at. Default is 3. + -k number of iterations for reading\n""" + % sys.argv[0] + ) + + try: + opts, pargs = getopt.getopt(sys.argv[1:], "vpRhtrwn:b:k:m:N:d:") + except Exception: + sys.stderr.write(usage) + sys.exit(0) + + # if we pass too much parameters, abort + if len(pargs) != 1: + sys.stderr.write(usage) + sys.exit(0) + + # default options + dselect = 3 + noise = 0 + verbose = 0 + heavy = 0 + testread = 1 + testwrite = 1 + usepsyco = 0 + nrows = 1000 + bfile = "sqlite-bench.h5" + supported_imodes = ["indexed", "standard"] + indexmode = "indexed" + riter = 2 + + # Get the options + for option in opts: + if option[0] == "-v": + verbose = 1 + if option[0] == "-p": + usepsyco = 1 + elif option[0] == "-R": + randomvalues = 1 + elif option[0] == "-h": + heavy = 1 + elif option[0] == "-t": + worst = 1 + elif option[0] == "-r": + testwrite = 0 + elif option[0] == "-w": + testread = 0 + elif option[0] == "-b": + bfile = option[1] + elif option[0] == "-N": + noise = float(option[1]) + elif option[0] == "-m": + indexmode = option[1] + if indexmode not in supported_imodes: + raise ValueError( + "Indexmode should be any of '%s' and you passed '%s'" + % (supported_imodes, indexmode) + ) + elif option[0] == "-n": + nrows = int(float(option[1]) * 1000) + elif option[0] == "-d": + dselect = float(option[1]) + elif option[0] == "-k": + riter = int(option[1]) + + # remaining parameters + dbfile = pargs[0] + + if worst: + nrows -= 1 # the worst case + + # Create the benchfile (if needed) + if not Path(bfile).exists(): + create_new_bench_file(bfile, verbose) + + if testwrite: + if psyco_imported and usepsyco: + psyco.bind(create_file) + psycon = 1 + create_file( + dbfile, nrows, None, indexmode, heavy, noise, bfile, verbose + ) + + if testread: + if psyco_imported and usepsyco: + psyco.bind(read_file) + psycon = 1 + read_file(dbfile, nrows, indexmode, heavy, dselect, bfile, riter) diff --git a/bench/sqlite-search-bench.sh b/bench/sqlite-search-bench.sh new file mode 100755 index 0000000..7d7e234 --- /dev/null +++ b/bench/sqlite-search-bench.sh @@ -0,0 +1,96 @@ +#!/bin/sh +# I don't know why, but the /usr/bin/python2.3 from Debian is a 30% slower +# than my own compiled version! 2004-08-18 +python="/usr/local/bin/python2.3 -O" + +writedata () { + nrows=$1 + bfile=$2 + smode=$3 + psyco=$4 + cmd="${python} sqlite-search-bench.py -b ${bfile} ${psyco} -m ${smode} -w -n ${nrows} data.nobackup/sqlite-${nrows}k-${smode}.h5" + echo ${cmd} + ${cmd} +} + +readdata () { + nrows=$1 + bfile=$2 + smode=$3 + psyco=$4 + + if [ "$smode" = "indexed" ]; then + repeats=100 + else + repeats=2 + fi + cmd="${python} sqlite-search-bench.py -b ${bfile} ${psyco} -n ${nrows} -m ${smode} -r -k ${repeats} data.nobackup/sqlite-${nrows}k-${smode}.h5" + echo ${cmd} + ${cmd} + # Finally, delete the source (if desired) + #rm -f data.nobackup/sqlite-${nrows}k-${smode}.h5 + return +} + +overwrite=0 +if [ $# > 1 ]; then + if [ "$1" = "-o" ]; then + overwrite=1 + fi +fi +if [ $# > 2 ]; then + psyco=$2 +fi + +# The name of the data bench file +bfile="sqlite-dbench.h5" + +# Move out a possible previous benchmark file +bn=`basename $bfile ".h5"` +mv -f ${bn}-bck2.h5 ${bn}-bck3.h5 +mv -f ${bn}-bck.h5 ${bn}-bck2.h5 +if [ "$overwrite" = "1" ]; then + echo "moving ${bn}.h5 to ${bn}-bck.h5" + mv -f ${bn}.h5 ${bn}-bck.h5 +else + echo "copying ${bn}.h5 to ${bn}-bck.h5" + cp -f ${bn}.h5 ${bn}-bck.h5 +fi + +# Configuration for testing +nrowsliststd="1 2 5 10 20 50" +#nrowslistidx="1 2 5 10 20 50" +#nrowsliststd="1 2 5 10 20 50 100 200 500 1000 2000 5000 10000 20000 50000" +nrowslistidx="1 2 5 10 20 50 100 200 500 1000 2000 5000 10000" + +#for smode in standard indexed; do +for smode in indexed; do + echo + echo "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" + echo "Entering ${smode} mode..." + echo "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" + echo + if [ "$smode" = "standard" ]; then + nrowslist=$nrowsliststd + else + nrowslist=$nrowslistidx + fi + # Write data files +# for nrows in $nrowslist; do +# echo "*************************************************************" +# echo "Writing for nrows=$nrows Krows, $smode, psyco=$psyco" +# echo "*************************************************************" +# writedata ${nrows} ${bfile} "${smode}" "${psyco}" +# done + # Read data files + ${python} cacheout.py + for nrows in $nrowslist; do + echo "***********************************************************" + echo "Searching for nrows=$nrows Krows, $smode, psyco=$psyco" + echo "***********************************************************" + readdata ${nrows} ${bfile} "${smode}" "${psyco}" + done +done + +echo "New data available on: $bfile" +exit 0 diff --git a/bench/sqlite3-search-bench.py b/bench/sqlite3-search-bench.py new file mode 100644 index 0000000..c2f95f5 --- /dev/null +++ b/bench/sqlite3-search-bench.py @@ -0,0 +1,199 @@ +import random +from time import perf_counter as clock +from pathlib import Path + +import numpy as np + +# in order to always generate the same random sequence +random.seed(19) + + +def fill_arrays(start, stop): + col_i = np.arange(start, stop, dtype=np.int32) + if userandom: + col_j = np.random.uniform(0, nrows, stop - start) + else: + col_j = np.array(col_i, dtype=np.float64) + return col_i, col_j + + +# Generator for ensure pytables benchmark compatibility + + +def int_generator(nrows): + step = 1000 * 100 + j = 0 + for i in range(nrows): + if i >= step * j: + stop = (j + 1) * step + if stop > nrows: # Seems unnecessary + stop = nrows + col_i, col_j = fill_arrays(i, stop) + j += 1 + k = 0 + yield (col_i[k], col_j[k]) + k += 1 + + +def int_generator_slow(nrows): + for i in range(nrows): + if userandom: + yield (i, float(random.randint(0, nrows))) + else: + yield (i, float(i)) + + +def open_db(filename, remove=0): + if remove and Path(filename).is_file(): + Path(filename).unlink() + con = sqlite.connect(filename) + cur = con.cursor() + return con, cur + + +def create_db(filename, nrows): + con, cur = open_db(filename, remove=1) + cur.execute("create table ints(i integer, j real)") + t1 = clock() + # This is twice as fast as a plain loop + cur.executemany("insert into ints(i,j) values (?,?)", int_generator(nrows)) + con.commit() + ctime = clock() - t1 + if verbose: + print(f"insert time: {ctime:.5f}") + print(f"Krows/s: {nrows / 1000 / ctime:.5f}") + close_db(con, cur) + + +def index_db(filename): + con, cur = open_db(filename) + t1 = clock() + cur.execute("create index ij on ints(j)") + con.commit() + itime = clock() - t1 + if verbose: + print(f"index time: {itime:.5f}") + print(f"Krows/s: {nrows / itime:.5f}") + # Close the DB + close_db(con, cur) + + +def query_db(filename, rng): + con, cur = open_db(filename) + t1 = clock() + ntimes = 10 + for i in range(ntimes): + # between clause does not seem to take advantage of indexes + # cur.execute("select j from ints where j between %s and %s" % \ + cur.execute( + "select i from ints where j >= %s and j <= %s" + % + # cur.execute("select i from ints where i >= %s and i <= + # %s" % + (rng[0] + i, rng[1] + i) + ) + results = cur.fetchall() + con.commit() + qtime = (clock() - t1) / ntimes + if verbose: + print(f"query time: {qtime:.5f}") + print(f"Mrows/s: {nrows / 1000 / qtime:.5f}") + print(results) + close_db(con, cur) + + +def close_db(con, cur): + cur.close() + con.close() + + +if __name__ == "__main__": + import sys + import getopt + + try: + import psyco + + psyco_imported = 1 + except Exception: + psyco_imported = 0 + + usage = ( + """usage: %s [-v] [-p] [-m] [-i] [-q] [-c] [-R range] [-n nrows] file + -v verbose + -p use "psyco" if available + -m use random values to fill the table + -q do query + -c create the database + -i index the table + -2 use sqlite2 (default is use sqlite3) + -R select a range in a field in the form "start,stop" (def "0,10") + -n sets the number of rows (in krows) in each table + \n""" + % sys.argv[0] + ) + + try: + opts, pargs = getopt.getopt(sys.argv[1:], "vpmiqc2R:n:") + except Exception: + sys.stderr.write(usage) + sys.exit(0) + + # default options + verbose = 0 + usepsyco = 0 + userandom = 0 + docreate = 0 + createindex = 0 + doquery = 0 + sqlite_version = "3" + rng = [0, 10] + nrows = 1 + + # Get the options + for option in opts: + if option[0] == "-v": + verbose = 1 + elif option[0] == "-p": + usepsyco = 1 + elif option[0] == "-m": + userandom = 1 + elif option[0] == "-i": + createindex = 1 + elif option[0] == "-q": + doquery = 1 + elif option[0] == "-c": + docreate = 1 + elif option[0] == "-2": + sqlite_version = "2" + elif option[0] == "-R": + rng = [int(i) for i in option[1].split(",")] + elif option[0] == "-n": + nrows = int(option[1]) + + # Catch the hdf5 file passed as the last argument + filename = pargs[0] + + if sqlite_version == "2": + import sqlite + else: + from pysqlite2 import dbapi2 as sqlite + + if verbose: + print("pysqlite version:", sqlite.version) + if userandom: + print("using random values") + + if docreate: + if verbose: + print("writing %s krows" % nrows) + if psyco_imported and usepsyco: + psyco.bind(create_db) + nrows *= 1000 + create_db(filename, nrows) + + if createindex: + index_db(filename) + + if doquery: + query_db(filename, rng) diff --git a/bench/stress-test.py b/bench/stress-test.py new file mode 100644 index 0000000..915c310 --- /dev/null +++ b/bench/stress-test.py @@ -0,0 +1,451 @@ +import gc +import sys +from time import perf_counter as clock +from time import process_time as cpuclock + +import numpy as np + +import tables as tb + + +class Test(tb.IsDescription): + ngroup = tb.Int32Col(pos=1) + ntable = tb.Int32Col(pos=2) + nrow = tb.Int32Col(pos=3) + # string = StringCol(itemsize=500, pos=4) + + +TestDict = { + "ngroup": tb.Int32Col(pos=1), + "ntable": tb.Int32Col(pos=2), + "nrow": tb.Int32Col(pos=3), +} + + +def create_file_arr(filename, ngroups, ntables, nrows): + + # First, create the groups + + # Open a file in "w"rite mode + fileh = tb.open_file(filename, mode="w", title="PyTables Stress Test") + + for k in range(ngroups): + # Create the group + fileh.create_group("/", "group%04d" % k, "Group %d" % k) + + fileh.close() + + # Now, create the arrays + arr = np.arange(nrows) + for k in range(ngroups): + fileh = tb.open_file(filename, mode="a", root_uep="group%04d" % k) + for j in range(ntables): + # Create the array + fileh.create_array("/", "array%04d" % j, arr, "Array %d" % j) + fileh.close() + + return (ngroups * ntables * nrows, 4) + + +def read_file_arr(filename, ngroups, recsize, verbose): + + rowsread = 0 + for ngroup in range(ngroups): + fileh = tb.open_file(filename, mode="r", root_uep="group%04d" % ngroup) + # Get the group + group = fileh.root + narrai = 0 + if verbose: + print("Group ==>", group) + for arrai in fileh.list_nodes(group, "Array"): + if verbose > 1: + print("Array ==>", arrai) + print("Rows in", arrai._v_pathname, ":", arrai.shape) + + arr = arrai.read() + + rowsread += len(arr) + narrai += 1 + + # Close the file (eventually destroy the extended type) + fileh.close() + + return (rowsread, 4, rowsread * 4) + + +def create_file( + filename, ngroups, ntables, nrows, complevel, complib, recsize +): + + # First, create the groups + + # Open a file in "w"rite mode + fileh = tb.open_file(filename, mode="w", title="PyTables Stress Test") + + for k in range(ngroups): + # Create the group + group = fileh.create_group("/", "group%04d" % k, "Group %d" % k) + + fileh.close() + + # Now, create the tables + rowswritten = 0 + if not ntables: + rowsize = 0 + + for k in range(ngroups): + print("Filling tables in group:", k) + fileh = tb.open_file(filename, mode="a", root_uep="group%04d" % k) + # Get the group + group = fileh.root + for j in range(ntables): + # Create a table + # table = fileh.create_table(group, 'table%04d'% j, Test, + table = fileh.create_table( + group, + "table%04d" % j, + TestDict, + "Table%04d" % j, + complevel, + complib, + nrows, + ) + rowsize = table.rowsize + # Get the row object associated with the new table + row = table.row + # Fill the table + for i in range(nrows): + row["ngroup"] = k + row["ntable"] = j + row["nrow"] = i + row.append() + + rowswritten += nrows + table.flush() + + # Close the file + fileh.close() + + return (rowswritten, rowsize) + + +def read_file(filename, ngroups, recsize, verbose): + # Open the HDF5 file in read-only mode + + rowsize = 0 + buffersize = 0 + rowsread = 0 + for ngroup in range(ngroups): + fileh = tb.open_file(filename, mode="r", root_uep="group%04d" % ngroup) + # Get the group + group = fileh.root + ntable = 0 + if verbose: + print("Group ==>", group) + for table in fileh.list_nodes(group, "Table"): + rowsize = table.rowsize + buffersize = table.rowsize * table.nrowsinbuf + if verbose > 1: + print("Table ==>", table) + print("Max rows in buf:", table.nrowsinbuf) + print("Rows in", table._v_pathname, ":", table.nrows) + print("Buffersize:", table.rowsize * table.nrowsinbuf) + print("MaxTuples:", table.nrowsinbuf) + + nrow = 0 + if table.nrows > 0: # only read if we have rows in tables + for row in table: + try: + assert row["ngroup"] == ngroup + assert row["ntable"] == ntable + assert row["nrow"] == nrow + except Exception: + print( + "Error in group: %d, table: %d, row: %d" + % (ngroup, ntable, nrow) + ) + print("Record ==>", row) + nrow += 1 + + assert nrow == table.nrows + rowsread += table.nrows + ntable += 1 + + # Close the file (eventually destroy the extended type) + fileh.close() + + return (rowsread, rowsize, buffersize) + + +class TrackRefs: + """Object to track reference counts across test runs.""" + + def __init__(self, verbose=0): + self.type2count = {} + self.type2all = {} + self.verbose = verbose + + def update(self, verbose=0): + obs = sys.getobjects(0) + type2count = {} + type2all = {} + for o in obs: + all_ = sys.getrefcount(o) + t = type(o) + if verbose: + # if t == types.TupleType: + if isinstance(o, tb.Group): + # if isinstance(o, MetaIsDescription): + print("-->", o, "refs:", all_) + refrs = gc.get_referrers(o) + trefrs = [] + for refr in refrs: + trefrs.append(type(refr)) + print("Referrers -->", refrs) + print("Referrers types -->", trefrs) + # if t == types.StringType: print "-->",o + if t in type2count: + type2count[t] += 1 + type2all[t] += all_ + else: + type2count[t] = 1 + type2all[t] = all_ + + ct = sorted( + [ + ( + type2count[t] - self.type2count.get(t, 0), + type2all[t] - self.type2all.get(t, 0), + t, + ) + for t in type2count.keys() + ] + ) + ct.reverse() + for delta1, delta2, t in ct: + if delta1 or delta2: + print("%-55s %8d %8d" % (t, delta1, delta2)) + + self.type2count = type2count + self.type2all = type2all + + +def dump_refs(preheat=10, iter1=10, iter2=10, *testargs): + + rc1 = rc2 = None + # testMethod() + for i in range(preheat): + test_method(*testargs) + gc.collect() + rc1 = sys.gettotalrefcount() + track = TrackRefs() + for i in range(iter1): + test_method(*testargs) + print("First output of TrackRefs:") + gc.collect() + rc2 = sys.gettotalrefcount() + track.update() + print( + "Inc refs in function testMethod --> %5d" % (rc2 - rc1), + file=sys.stderr, + ) + for i in range(iter2): + test_method(*testargs) + track.update(verbose=1) + print("Second output of TrackRefs:") + gc.collect() + rc3 = sys.gettotalrefcount() + + print( + "Inc refs in function testMethod --> %5d" % (rc3 - rc2), + file=sys.stderr, + ) + + +def dump_garbage(): + """show us waht the garbage is about.""" + # Force collection + print("\nGARBAGE:") + gc.collect() + + print("\nGARBAGE OBJECTS:") + for x in gc.garbage: + s = str(x) + # if len(s) > 80: s = s[:77] + "..." + print(type(x), "\n ", s) + + # print "\nTRACKED OBJECTS:" + # reportLoggedInstances("*") + + +def test_method( + file, + usearray, + testwrite, + testread, + complib, + complevel, + ngroups, + ntables, + nrows, +): + + if complevel > 0: + print("Compression library:", complib) + if testwrite: + t1 = clock() + cpu1 = cpuclock() + if usearray: + (rowsw, rowsz) = create_file_arr(file, ngroups, ntables, nrows) + else: + (rowsw, rowsz) = create_file( + file, ngroups, ntables, nrows, complevel, complib, recsize + ) + t2 = clock() + cpu2 = cpuclock() + tapprows = t2 - t1 + cpuapprows = cpu2 - cpu1 + print(f"Rows written: {rowsw} Row size: {rowsz}") + print( + f"Time writing rows: {tapprows:.3f} s (real) " + f"{cpuapprows:.3f} s (cpu) {cpuapprows / tapprows:.0%}" + ) + print(f"Write rows/sec: {rowsw / tapprows}") + print(f"Write KB/s : {rowsw * rowsz / (tapprows * 1024):.0f}") + + if testread: + t1 = clock() + cpu1 = cpuclock() + if usearray: + (rowsr, rowsz, bufsz) = read_file_arr( + file, ngroups, recsize, verbose + ) + else: + (rowsr, rowsz, bufsz) = read_file(file, ngroups, recsize, verbose) + t2 = clock() + cpu2 = cpuclock() + treadrows = t2 - t1 + cpureadrows = cpu2 - cpu1 + print(f"Rows read: {rowsw} Row size: {rowsz}, Buf size: {bufsz}") + print( + f"Time reading rows: {treadrows:.3f} s (real) " + f"{cpureadrows:.3f} s (cpu) {cpureadrows / treadrows:.0%}" + ) + print(f"Read rows/sec: {rowsr / treadrows}") + print(f"Read KB/s : {rowsr * rowsz / (treadrows * 1024):.0f}") + + +if __name__ == "__main__": + import getopt + import profile + + try: + import psyco + + psyco_imported = 1 + except Exception: + psyco_imported = 0 + + usage = """usage: %s [-d debug] [-v level] [-p] [-r] [-w] [-l complib] [-c complevel] [-g ngroups] [-t ntables] [-i nrows] file + -d debugging level + -v verbosity level + -p use "psyco" if available + -a use Array objects instead of Table + -r only read test + -w only write test + -l sets the compression library to be used ("zlib", "lzo", "ucl", "bzip2") + -c sets a compression level (do not set it or 0 for no compression) + -g number of groups hanging from "/" + -t number of tables per group + -i number of rows per table +""" + + try: + opts, pargs = getopt.getopt(sys.argv[1:], "d:v:parwl:c:g:t:i:") + except Exception: + sys.stderr.write(usage) + sys.exit(0) + + # if we pass too much parameters, abort + if len(pargs) != 1: + sys.stderr.write(usage) + sys.exit(0) + + # default options + ngroups = 5 + ntables = 5 + nrows = 100 + verbose = 0 + debug = 0 + recsize = "medium" + testread = 1 + testwrite = 1 + usepsyco = 0 + usearray = 0 + complevel = 0 + complib = "zlib" + + # Get the options + for option in opts: + if option[0] == "-d": + debug = int(option[1]) + if option[0] == "-v": + verbose = int(option[1]) + if option[0] == "-p": + usepsyco = 1 + if option[0] == "-a": + usearray = 1 + elif option[0] == "-r": + testwrite = 0 + elif option[0] == "-w": + testread = 0 + elif option[0] == "-l": + complib = option[1] + elif option[0] == "-c": + complevel = int(option[1]) + elif option[0] == "-g": + ngroups = int(option[1]) + elif option[0] == "-t": + ntables = int(option[1]) + elif option[0] == "-i": + nrows = int(option[1]) + + if debug: + gc.enable() + + if debug == 1: + gc.set_debug(gc.DEBUG_LEAK) + + # Catch the hdf5 file passed as the last argument + file = pargs[0] + + if psyco_imported and usepsyco: + psyco.bind(create_file) + psyco.bind(read_file) + + if debug == 2: + dump_refs( + 10, + 10, + 15, + file, + usearray, + testwrite, + testread, + complib, + complevel, + ngroups, + ntables, + nrows, + ) + else: + # testMethod(file, usearray, testwrite, testread, complib, complevel, + # ngroups, ntables, nrows) + profile.run( + "testMethod(file, usearray, testwrite, testread, " + + "complib, complevel, ngroups, ntables, nrows)" + ) + + # Show the dirt + if debug == 1: + dump_garbage() diff --git a/bench/stress-test2.py b/bench/stress-test2.py new file mode 100644 index 0000000..f95b9db --- /dev/null +++ b/bench/stress-test2.py @@ -0,0 +1,255 @@ +import gc +import sys +import random +from time import perf_counter as clock +from time import process_time as cpuclock + +import tables as tb + + +class Test(tb.IsDescription): + ngroup = tb.Int32Col(pos=1) + ntable = tb.Int32Col(pos=2) + nrow = tb.Int32Col(pos=3) + time = tb.Float64Col(pos=5) + random = tb.Float32Col(pos=4) + + +def create_file( + filename, ngroups, ntables, nrows, complevel, complib, recsize +): + + # First, create the groups + + # Open a file in "w"rite mode + fileh = tb.open_file(filename, mode="w", title="PyTables Stress Test") + + for k in range(ngroups): + # Create the group + group = fileh.create_group("/", "group%04d" % k, "Group %d" % k) + + fileh.close() + + # Now, create the tables + rowswritten = 0 + for k in range(ngroups): + fileh = tb.open_file(filename, mode="a", root_uep="group%04d" % k) + # Get the group + group = fileh.root + for j in range(ntables): + # Create a table + table = fileh.create_table( + group, + "table%04d" % j, + Test, + "Table%04d" % j, + complevel, + complib, + nrows, + ) + # Get the row object associated with the new table + row = table.row + # Fill the table + for i in range(nrows): + row["time"] = clock() + row["random"] = random.random() * 40 + 100 + row["ngroup"] = k + row["ntable"] = j + row["nrow"] = i + row.append() + + rowswritten += nrows + table.flush() + + # Close the file + fileh.close() + + return (rowswritten, table.rowsize) + + +def read_file(filename, ngroups, recsize, verbose): + # Open the HDF5 file in read-only mode + + rowsread = 0 + for ngroup in range(ngroups): + fileh = tb.open_file(filename, mode="r", root_uep="group%04d" % ngroup) + # Get the group + group = fileh.root + ntable = 0 + if verbose: + print("Group ==>", group) + for table in fileh.list_nodes(group, "Table"): + rowsize = table.rowsize + buffersize = table.rowsize * table.nrowsinbuf + if verbose > 1: + print("Table ==>", table) + print("Max rows in buf:", table.nrowsinbuf) + print("Rows in", table._v_pathname, ":", table.nrows) + print("Buffersize:", table.rowsize * table.nrowsinbuf) + print("MaxTuples:", table.nrowsinbuf) + + nrow = 0 + time_1 = 0.0 + for row in table: + try: + # print "row['ngroup'], ngroup ==>", row["ngroup"], ngroup + assert row["ngroup"] == ngroup + assert row["ntable"] == ntable + assert row["nrow"] == nrow + # print "row['time'], time_1 ==>", row["time"], time_1 + assert row["time"] >= (time_1 - 0.01) + # assert 100 <= row["random"] <= 139.999 + assert 100 <= row["random"] <= 140 + except Exception: + print( + "Error in group: %d, table: %d, row: %d" + % (ngroup, ntable, nrow) + ) + print("Record ==>", row) + time_1 = row["time"] + nrow += 1 + + assert nrow == table.nrows + rowsread += table.nrows + ntable += 1 + + # Close the file (eventually destroy the extended type) + fileh.close() + + return (rowsread, rowsize, buffersize) + + +def dump_garbage(): + """show us waht the garbage is about.""" + # Force collection + print("\nGARBAGE:") + gc.collect() + + print("\nGARBAGE OBJECTS:") + for x in gc.garbage: + s = str(x) + # if len(s) > 80: s = s[:77] + "..." + print(type(x), "\n ", s) + + +if __name__ == "__main__": + import getopt + + try: + import psyco + + psyco_imported = 1 + except Exception: + psyco_imported = 0 + + usage = """usage: %s [-d debug] [-v level] [-p] [-r] [-w] [-l complib] [-c complevel] [-g ngroups] [-t ntables] [-i nrows] file + -d debugging level + -v verbosity level + -p use "psyco" if available + -r only read test + -w only write test + -l sets the compression library to be used ("zlib", "lzo", "ucl", "bzip2") + -c sets a compression level (do not set it or 0 for no compression) + -g number of groups hanging from "/" + -t number of tables per group + -i number of rows per table +""" + + try: + opts, pargs = getopt.getopt(sys.argv[1:], "d:v:prwl:c:g:t:i:") + except Exception: + sys.stderr.write(usage) + sys.exit(0) + + # if we pass too much parameters, abort + if len(pargs) != 1: + sys.stderr.write(usage) + sys.exit(0) + + # default options + ngroups = 5 + ntables = 5 + nrows = 100 + verbose = 0 + debug = 0 + recsize = "medium" + testread = 1 + testwrite = 1 + usepsyco = 0 + complevel = 0 + complib = "zlib" + + # Get the options + for option in opts: + if option[0] == "-d": + debug = int(option[1]) + if option[0] == "-v": + verbose = int(option[1]) + if option[0] == "-p": + usepsyco = 1 + elif option[0] == "-r": + testwrite = 0 + elif option[0] == "-w": + testread = 0 + elif option[0] == "-l": + complib = option[1] + elif option[0] == "-c": + complevel = int(option[1]) + elif option[0] == "-g": + ngroups = int(option[1]) + elif option[0] == "-t": + ntables = int(option[1]) + elif option[0] == "-i": + nrows = int(option[1]) + + if debug: + gc.enable() + gc.set_debug(gc.DEBUG_LEAK) + + # Catch the hdf5 file passed as the last argument + file = pargs[0] + + print("Compression level:", complevel) + if complevel > 0: + print("Compression library:", complib) + if testwrite: + t1 = clock() + cpu1 = cpuclock() + if psyco_imported and usepsyco: + psyco.bind(create_file) + (rowsw, rowsz) = create_file( + file, ngroups, ntables, nrows, complevel, complib, recsize + ) + t2 = clock() + cpu2 = cpuclock() + tapprows = t2 - t1 + cpuapprows = cpu2 - cpu1 + print(f"Rows written: {rowsw} Row size: {rowsz}") + print( + f"Time writing rows: {tapprows:.3f} s (real) " + f"{cpuapprows:.3f} s (cpu) {cpuapprows / tapprows:.0%}" + ) + print(f"Write rows/sec: {rowsw / tapprows}") + print(f"Write KB/s : {rowsw * rowsz / (tapprows * 1024):.0f}") + + if testread: + t1 = clock() + cpu1 = cpuclock() + if psyco_imported and usepsyco: + psyco.bind(read_file) + (rowsr, rowsz, bufsz) = read_file(file, ngroups, recsize, verbose) + t2 = clock() + cpu2 = cpuclock() + treadrows = t2 - t1 + cpureadrows = cpu2 - cpu1 + print(f"Rows read: {rowsw} Row size: {rowsz}, Buf size: {bufsz}") + print( + f"Time reading rows: {treadrows:.3f} s (real) " + f"{cpureadrows:.3f} s (cpu) {cpureadrows / treadrows:.0%}" + ) + print(f"Read rows/sec: {rowsr / treadrows}") + print(f"Read KB/s : {rowsr * rowsz / (treadrows * 1024):.0f}") + + # Show the dirt + if debug > 1: + dump_garbage() diff --git a/bench/stress-test3.py b/bench/stress-test3.py new file mode 100644 index 0000000..1d85f3c --- /dev/null +++ b/bench/stress-test3.py @@ -0,0 +1,243 @@ +#!/usr/bin/env python + +"""This script allows to create arbitrarily large files with the desired +combination of groups, tables per group and rows per table. + +Issue "python stress-test3.py" without parameters for a help on usage. + +""" + +import gc +import sys +from time import perf_counter as clock +from time import process_time as cpuclock + +import tables as tb + + +class Test(tb.IsDescription): + ngroup = tb.Int32Col(pos=1) + ntable = tb.Int32Col(pos=2) + nrow = tb.Int32Col(pos=3) + float_ = tb.Float32Col(pos=3) + # string = tb.StringCol(500, pos=4) + + +def create_file( + filename, ngroups, ntables, nrows, complevel, complib, recsize +): + + # First, create the groups + + # Open a file in "w"rite mode + fileh = tb.open_file(filename, mode="w", title="PyTables Stress Test") + + for k in range(ngroups): + # Create the group + group = fileh.create_group("/", "group%04d" % k, "Group %d" % k) + + fileh.close() + + # Now, create the tables + rowswritten = 0 + rowsize = 0 + for k in range(ngroups): + fileh = tb.open_file(filename, mode="a", root_uep="group%04d" % k) + # Get the group + group = fileh.root + for j in range(ntables): + # Create a table + table = fileh.create_table( + group, + "table%04d" % j, + Test, + "Table%04d" % j, + tb.Filters(complevel, complib), + nrows, + ) + rowsize = table.rowsize + # Get the row object associated with the new table + row = table.row + # Fill the table + for i in range(nrows): + row["ngroup"] = k + row["ntable"] = j + row["nrow"] = i + row.append() + + rowswritten += nrows + table.flush() + + # Close the file + fileh.close() + + return (rowswritten, rowsize) + + +def read_file(filename, ngroups, recsize, verbose): + # Open the HDF5 file in read-only mode + + rowsread = 0 + rowsize = 0 + buffersize = 0 + for ngroup in range(ngroups): + fileh = tb.open_file(filename, mode="r") + # Get the group + group = fileh.get_node(fileh.root, "group%04d" % ngroup) + ntable = 0 + if verbose: + print("Group ==>", group) + for table in fileh.list_nodes(group, "Leaf"): + rowsize = table.rowsize + buffersize = table.rowsize * table.nrowsinbuf + if verbose > 1: + print("Table ==>", table) + print("Max rows in buf:", table.nrowsinbuf) + print("Rows in", table._v_pathname, ":", table.nrows) + print("Buffersize:", table.rowsize * table.nrowsinbuf) + print("MaxTuples:", table.nrowsinbuf) + + nrow = 0 + for row in table: + try: + assert row["ngroup"] == ngroup + assert row["ntable"] == ntable + assert row["nrow"] == nrow + except Exception: + print( + "Error in group: %d, table: %d, row: %d" + % (ngroup, ntable, nrow) + ) + print("Record ==>", row) + nrow += 1 + + assert nrow == table.nrows + rowsread += table.nrows + ntable += 1 + + # Close the file (eventually destroy the extended type) + fileh.close() + + return (rowsread, rowsize, buffersize) + + +def dump_garbage(): + """show us what the garbage is about.""" + # Force collection + print("\nGARBAGE:") + gc.collect() + + print("\nGARBAGE OBJECTS:") + for x in gc.garbage: + s = str(x) + # if len(s) > 80: s = s[:77] + "..." + print(type(x), "\n ", s) + + +if __name__ == "__main__": + import getopt + + usage = """usage: %s [-d debug] [-v level] [-p] [-r] [-w] [-l complib] [-c complevel] [-g ngroups] [-t ntables] [-i nrows] file + -d debugging level + -v verbosity level + -r only read test + -w only write test + -l sets the compression library to be used ("zlib", "bzip2", "blosc", "blosc:codec") + -c sets a compression level (do not set it or 0 for no compression) + -g number of groups hanging from "/" + -t number of tables per group + -i number of rows per table +""" + + try: + opts, pargs = getopt.getopt(sys.argv[1:], "d:v:parwl:c:g:t:i:") + except Exception: + sys.stderr.write(usage) + sys.exit(0) + + # if we pass too much parameters, abort + if len(pargs) != 1: + sys.stderr.write(usage) + sys.exit(0) + + # default options + ngroups = 5 + ntables = 5 + nrows = 100 + verbose = 0 + debug = 0 + recsize = "medium" + testread = 1 + testwrite = 1 + complevel = 0 + complib = "zlib" + + # Get the options + for option in opts: + if option[0] == "-d": + debug = int(option[1]) + if option[0] == "-v": + verbose = int(option[1]) + elif option[0] == "-r": + testwrite = 0 + elif option[0] == "-w": + testread = 0 + elif option[0] == "-l": + complib = option[1] + elif option[0] == "-c": + complevel = int(option[1]) + elif option[0] == "-g": + ngroups = int(option[1]) + elif option[0] == "-t": + ntables = int(option[1]) + elif option[0] == "-i": + nrows = int(option[1]) + + if debug: + gc.enable() + gc.set_debug(gc.DEBUG_LEAK) + + # Catch the hdf5 file passed as the last argument + file = pargs[0] + + print("Compression level:", complevel) + if complevel > 0: + print("Compression library:", complib) + rowsw = 0 + if testwrite: + t1 = clock() + cpu1 = cpuclock() + (rowsw, rowsz) = create_file( + file, ngroups, ntables, nrows, complevel, complib, recsize + ) + t2 = clock() + cpu2 = cpuclock() + tapprows = t2 - t1 + cpuapprows = cpu2 - cpu1 + print(f"Rows written: {rowsw} Row size: {rowsz}") + print( + f"Time writing rows: {tapprows:.3f} s (real) " + f"{cpuapprows:.3f} s (cpu) {cpuapprows / tapprows:.0%}" + ) + print(f"Write Krows/sec: {rowsw / (tapprows * 1000):.3f}") + print(f"Write MB/s : {rowsw * rowsz / (tapprows * 2**20):.3f}") + + if testread: + t1 = clock() + cpu1 = cpuclock() + (rowsr, rowsz, bufsz) = read_file(file, ngroups, recsize, verbose) + t2 = clock() + cpu2 = cpuclock() + treadrows = t2 - t1 + cpureadrows = cpu2 - cpu1 + print(f"Rows read: {rowsw} Row size: {rowsz}, Buf size: {bufsz}") + print( + f"Time reading rows: {treadrows:.3f} s (real) " + f"{cpureadrows:.3f} s (cpu) {cpureadrows / treadrows:.0%}" + ) + print(f"Read Krows/sec: {rowsr / (treadrows * 1000):.3f}") + print(f"Read MB/s : {rowsr * rowsz / (treadrows * 2**20):.3f}") + + # Show the dirt + if debug > 1: + dump_garbage() diff --git a/bench/table-bench.py b/bench/table-bench.py new file mode 100644 index 0000000..8ee3d45 --- /dev/null +++ b/bench/table-bench.py @@ -0,0 +1,431 @@ +#!/usr/bin/env python + +import numpy as np + +import tables as tb + + +class Small(tb.IsDescription): + var1 = tb.StringCol(itemsize=4, pos=2) + var2 = tb.Int32Col(pos=1) + var3 = tb.Float64Col(pos=0) + + +# Define a user record to characterize some kind of particles + + +class Medium(tb.IsDescription): + name = tb.StringCol(itemsize=16, pos=0) # 16-character String + float1 = tb.Float64Col(shape=2, dflt=np.arange(2), pos=1) + # float1 = Float64Col(dflt=2.3) + # float2 = Float64Col(dflt=2.3) + # zADCcount = Int16Col() # signed short integer + ADCcount = tb.Int32Col(pos=6) # signed short integer + grid_i = tb.Int32Col(pos=7) # integer + grid_j = tb.Int32Col(pos=8) # integer + pressure = tb.Float32Col(pos=9) # float (single-precision) + energy = tb.Float64Col(pos=2) # double (double-precision) + # unalig = Int8Col() # just to unalign data + + +# Define a user record to characterize some kind of particles + + +class Big(tb.IsDescription): + name = tb.StringCol(itemsize=16) # 16-character String + float1 = tb.Float64Col(shape=32, dflt=np.arange(32)) + float2 = tb.Float64Col(shape=32, dflt=2.2) + TDCcount = tb.Int8Col() # signed short integer + # ADCcount = Int32Col() + # ADCcount = Int16Col() # signed short integer + grid_i = tb.Int32Col() # integer + grid_j = tb.Int32Col() # integer + pressure = tb.Float32Col() # float (single-precision) + energy = tb.Float64Col() # double (double-precision) + + +def create_file(filename, totalrows, filters, recsize): + + # Open a file in "w"rite mode + fileh = tb.open_file( + filename, mode="w", title="Table Benchmark", filters=filters + ) + + # Table title + title = "This is the table title" + + # Create a Table instance + group = fileh.root + rowswritten = 0 + for j in range(3): + # Create a table + if recsize == "big": + table = fileh.create_table( + group, "tuple" + str(j), Big, title, None, totalrows + ) + elif recsize == "medium": + table = fileh.create_table( + group, "tuple" + str(j), Medium, title, None, totalrows + ) + elif recsize == "small": + table = fileh.create_table( + group, "tuple" + str(j), Small, title, None, totalrows + ) + else: + raise RuntimeError("This should never happen") + + table.attrs.test = 2 + rowsize = table.rowsize + # Get the row object associated with the new table + d = table.row + # Fill the table + if recsize == "big": + for i in range(totalrows): + # d['name'] = 'Part: %6d' % (i) + d["TDCcount"] = i % 256 + # d['float1'] = NP.array([i]*32, NP.float64) + # d['float2'] = NP.array([i**2]*32, NP.float64) + # d['float1'][0] = float(i) + # d['float2'][0] = float(i*2) + # Common part with medium + d["grid_i"] = i + d["grid_j"] = 10 - i + d["pressure"] = float(i * i) + # d['energy'] = float(d['pressure'] ** 4) + d["energy"] = d["pressure"] + # d['idnumber'] = i * (2 ** 34) + d.append() + elif recsize == "medium": + for i in range(totalrows): + # d['name'] = 'Part: %6d' % (i) + # d['float1'] = NP.array([i]*2, NP.float64) + # d['float1'] = arr + # d['float1'] = i + # d['float2'] = float(i) + # Common part with big: + d["grid_i"] = i + d["grid_j"] = 10 - i + d["pressure"] = i * 2 + # d['energy'] = float(d['pressure'] ** 4) + d["energy"] = d["pressure"] + d.append() + else: # Small record + for i in range(totalrows): + # d['var1'] = str(random.randrange(1000000)) + # d['var3'] = random.randrange(10000000) + d["var1"] = str(i) + # d['var2'] = random.randrange(totalrows) + d["var2"] = i + # d['var3'] = 12.1e10 + d["var3"] = totalrows - i + d.append() # This is a 10% faster than table.append() + rowswritten += totalrows + + if recsize == "small": + # Testing with indexing + pass + # table._createIndex("var3", Filters(1,"zlib",shuffle=1)) + + # table.flush() + group._v_attrs.test2 = "just a test" + # Create a new group + group2 = fileh.create_group(group, "group" + str(j)) + # Iterate over this new group (group2) + group = group2 + table.flush() + + # Close the file (eventually destroy the extended type) + fileh.close() + return (rowswritten, rowsize) + + +def read_file(filename, recsize, verbose): + # Open the HDF5 file in read-only mode + + fileh = tb.open_file(filename, mode="r") + rowsread = 0 + for groupobj in fileh.walk_groups(fileh.root): + # print "Group pathname:", groupobj._v_pathname + row = 0 + for table in fileh.list_nodes(groupobj, "Table"): + rowsize = table.rowsize + print("reading", table) + if verbose: + print("Max rows in buf:", table.nrowsinbuf) + print("Rows in", table._v_pathname, ":", table.nrows) + print("Buffersize:", table.rowsize * table.nrowsinbuf) + print("MaxTuples:", table.nrowsinbuf) + + if recsize == "big" or recsize == "medium": + # e = [ p.float1 for p in table.iterrows() + # if p.grid_i < 2 ] + # e = [ str(p) for p in table.iterrows() ] + # if p.grid_i < 2 ] + # e = [ p['grid_i'] for p in table.iterrows() + # if p['grid_j'] == 20 and p['grid_i'] < 20 ] + # e = [ p['grid_i'] for p in table + # if p['grid_i'] <= 2 ] + # e = [ p['grid_i'] for p in table.where("grid_i<=20")] + # e = [ p['grid_i'] for p in + # table.where('grid_i <= 20')] + e = [ + p["grid_i"] + for p in table.where("(grid_i <= 20) & (grid_j == 20)") + ] + # e = [ p['grid_i'] for p in table.iterrows() + # if p.nrow() == 20 ] + # e = [ table.delrow(p.nrow()) for p in table.iterrows() + # if p.nrow() == 20 ] + # The version with a for loop is only 1% better than + # comprenhension list + # e = [] + # for p in table.iterrows(): + # if p.grid_i < 20: + # e.append(p.grid_j) + else: # small record case + # e = [ p['var3'] for p in table.iterrows() + # if p['var2'] < 20 and p['var3'] < 20 ] + # e = [ p['var3'] for p in table.where("var3 <= 20") + # if p['var2'] < 20 ] + # e = [ p['var3'] for p in table.where("var3 <= 20")] + # Cuts 1) and 2) issues the same results but 2) is about 10 times faster + # Cut 1) + # e = [ p.nrow() for p in + # table.where(table.cols.var2 > 5) + # if p["var2"] < 10] + # Cut 2) + # e = [ p.nrow() for p in + # table.where(table.cols.var2 < 10) + # if p["var2"] > 5] + # e = [ (p._nrow,p["var3"]) for p in + # e = [ p["var3"] for p in + # table.where(table.cols.var3 < 10)] + # table.where(table.cols.var3 < 10)] + # table if p["var3"] <= 10] + # e = [ p['var3'] for p in table.where("var3 <= 20")] + # e = [ p['var3'] for p in + # table.where(table.cols.var1 == "10")] # More + # than ten times faster than the next one + # e = [ p['var3'] for p in table + # if p['var1'] == "10"] + # e = [ p['var3'] for p in table.where('var2 <= 20')] + e = [ + p["var3"] + for p in table.where("(var2 <= 20) & (var2 >= 3)") + ] + # e = [ p[0] for p in table.where('var2 <= 20')] + # e = [ p['var3'] for p in table if p['var2'] <= 20 ] + # e = [ p[:] for p in table if p[1] <= 20 ] + # e = [ p['var3'] for p in table._whereInRange(table.cols.var2 <=20)] + # e = [ p['var3'] for p in table.iterrows(0,21) ] + # e = [ p['var3'] for p in table.iterrows() + # if p.nrow() <= 20 ] + # e = [ p['var3'] for p in table.iterrows(1,0,1000)] + # e = [ p['var3'] for p in table.iterrows(1,100)] + # e = [ p['var3'] for p in table.iterrows(step=2) + # if p.nrow() < 20 ] + # e = [ p['var2'] for p in table.iterrows() + # if p['var2'] < 20 ] + # for p in table.iterrows(): + # pass + if verbose: + # print "Last record read:", p + print("resulting selection list ==>", e) + + rowsread += table.nrows + row += 1 + if verbose: + print("Total selected records ==> ", len(e)) + + # Close the file (eventually destroy the extended type) + fileh.close() + + return (rowsread, rowsize) + + +def read_field(filename, field, rng, verbose): + fileh = tb.open_file(filename, mode="r") + rowsread = 0 + if rng is None: + rng = [0, -1, 1] + if field == "all": + field = None + for groupobj in fileh.walk_groups(fileh.root): + for table in fileh.list_nodes(groupobj, "Table"): + rowsize = table.rowsize + # table.nrowsinbuf = 3 # For testing purposes + if verbose: + print("Max rows in buf:", table.nrowsinbuf) + print("Rows in", table._v_pathname, ":", table.nrows) + print("Buffersize:", table.rowsize * table.nrowsinbuf) + print("MaxTuples:", table.nrowsinbuf) + print( + "(field, start, stop, step) ==>", + (field, rng[0], rng[1], rng[2]), + ) + + e = table.read(rng[0], rng[1], rng[2], field) + + rowsread += table.nrows + if verbose: + print("Selected rows ==> ", e) + print("Total selected rows ==> ", len(e)) + + # Close the file (eventually destroy the extended type) + fileh.close() + return (rowsread, rowsize) + + +if __name__ == "__main__": + import sys + import getopt + from time import perf_counter as clock + from time import process_time as cpuclock + + usage = ( + """usage: %s [-v] [-P] [-R range] [-r] [-w] [-s recsize] [-f field] [-c level] [-l complib] [-n nrows] [-S] [-B] [-F] file + -v verbose + -P do profile + -R select a range in a field in the form "start,stop,step" + -r only read test + -w only write test + -s use [big] record, [medium] or [small] + -f only read stated field name in tables ("all" means all fields) + -c sets a compression level (do not set it or 0 for no compression) + -S activate shuffle filter + -B activate bitshuffle filter + -F activate fletcher32 filter + -l sets the compression library to be used ("zlib", "lzo", "blosc", "bzip2") + -n sets the number of rows in each table\n""" + % sys.argv[0] + ) + + try: + opts, pargs = getopt.getopt(sys.argv[1:], "vPSBFR:rwf:s:c:l:n:") + except Exception: + sys.stderr.write(usage) + sys.exit(0) + + # if we pass too much parameters, abort + if len(pargs) != 1: + sys.stderr.write(usage) + sys.exit(0) + + # default options + verbose = 0 + profile = 0 + rng = None + recsize = "medium" + field_name = None + testread = 1 + testwrite = 1 + complevel = 9 + shuffle = 0 + fletcher32 = 0 + complib = "blosc2:blosclz" + nrows = 1_000_000 + + # Get the options + for option in opts: + if option[0] == "-v": + verbose = 1 + if option[0] == "-P": + profile = 1 + if option[0] == "-S": + shuffle = 1 + if option[0] == "-B": + shuffle = 2 # bitshuffle + if option[0] == "-F": + fletcher32 = 1 + elif option[0] == "-R": + rng = [int(i) for i in option[1].split(",")] + elif option[0] == "-r": + testwrite = 0 + elif option[0] == "-w": + testread = 0 + elif option[0] == "-f": + field_name = option[1] + elif option[0] == "-s": + recsize = option[1] + if recsize not in ["big", "medium", "small"]: + sys.stderr.write(usage) + sys.exit(0) + elif option[0] == "-c": + complevel = int(option[1]) + elif option[0] == "-l": + complib = option[1] + elif option[0] == "-n": + nrows = int(option[1]) + + # Build the Filters instance + filters = tb.Filters( + complevel=complevel, + complib=complib, + shuffle=(True if shuffle == 1 else False), + bitshuffle=(True if shuffle == 2 else False), + fletcher32=fletcher32, + ) + + # Catch the hdf5 file passed as the last argument + file = pargs[0] + + if verbose: + print("numpy version:", np.__version__) + + if testwrite: + print("Compression level:", complevel) + if complevel > 0: + print("Compression library:", complib) + if shuffle == 1: + print("Shuffling...") + elif shuffle == 2: + print("Bitshuffling...") + t1 = clock() + cpu1 = cpuclock() + if profile: + import pstats + import profile as prof + + prof.run( + "(rowsw, rowsz) = createFile(file, nrows, filters, " + "recsize)", + "table-bench.prof", + ) + stats = pstats.Stats("table-bench.prof") + stats.strip_dirs() + stats.sort_stats("time", "calls") + stats.print_stats(20) + else: + (rowsw, rowsz) = create_file(file, nrows, filters, recsize) + t2 = clock() + cpu2 = cpuclock() + tapprows = t2 - t1 + cpuapprows = cpu2 - cpu1 + print(f"Rows written: {rowsw} Row size: {rowsz}") + print( + f"Time writing rows: {tapprows:.3f} s (real) " + f"{cpuapprows:.3f} s (cpu) {cpuapprows / tapprows:.0%}" + ) + print(f"Write Mrows/sec: {rowsw / (tapprows * 1e6):.3f}") + print(f"Write MB/s : {rowsw * rowsz / (tapprows * 1024 * 1024):.3f}") + + if testread: + t1 = clock() + cpu1 = cpuclock() + if rng or field_name: + (rowsr, rowsz) = read_field(file, field_name, rng, verbose) + pass + else: + for i in range(1): + (rowsr, rowsz) = read_file(file, recsize, verbose) + t2 = clock() + cpu2 = cpuclock() + treadrows = t2 - t1 + cpureadrows = cpu2 - cpu1 + print(f"Rows read: {rowsr} Row size: {rowsz}") + print( + f"Time reading rows: {treadrows:.3f} s (real) " + f"{cpureadrows:.3f} s (cpu) {cpureadrows / treadrows:.0%}" + ) + print(f"Read Mrows/sec: {rowsr / (treadrows * 1e6):.3f}") + print(f"Read MB/s : {rowsr * rowsz / (treadrows * 1024 * 1024):.3f}") diff --git a/bench/table-copy.py b/bench/table-copy.py new file mode 100644 index 0000000..2196c9a --- /dev/null +++ b/bench/table-copy.py @@ -0,0 +1,119 @@ +from time import perf_counter as clock + +import numpy as np + +import tables as tb + +N = 10_000_000 + +filters = tb.Filters(9, "blosc2", shuffle=True) + + +def timed(func, *args, **kwargs): + start = clock() + res = func(*args, **kwargs) + print(f"{clock() - start:.3f}s elapsed.") + return res + + +def create_table(output_path): + print("creating array...", end=" ") + dt = np.dtype([("field%d" % i, int) for i in range(32)]) + a = np.zeros(N, dtype=dt) + print("done.") + + output_file = tb.open_file(output_path, mode="w") + table = output_file.create_table("/", "test", dt, filters=filters) + print("appending data...", end=" ") + table.append(a) + print("flushing...", end=" ") + table.flush() + print("done.") + output_file.close() + + +def copy1(input_path, output_path): + print(f"copying data from {input_path} to {output_path}...") + input_file = tb.open_file(input_path, mode="r") + output_file = tb.open_file(output_path, mode="w") + + # copy nodes as a batch + input_file.copy_node( + "/", output_file.root, recursive=True, filters=filters + ) + output_file.close() + input_file.close() + + +def copy2(input_path, output_path): + print(f"copying data from {input_path} to {output_path}...") + input_file = tb.open_file(input_path, mode="r") + input_file.copy_file(output_path, overwrite=True, filters=filters) + input_file.close() + + +def copy3(input_path, output_path): + print(f"copying data from {input_path} to {output_path}...") + input_file = tb.open_file(input_path, mode="r") + output_file = tb.open_file(output_path, mode="w", filters=filters) + table = input_file.root.test + table.copy(output_file.root) + output_file.close() + input_file.close() + + +def copy4(input_path, output_path): + print(f"copying data from {input_path} to {output_path}...") + input_file = tb.open_file(input_path, mode="r") + output_file = tb.open_file(output_path, mode="w", filters=filters) + + input_table = input_file.root.test + print("reading data...", end=" ") + start = clock() + data = input_file.root.test.read() + print(f"{clock() - start:.3f}s elapsed.") + print("done.") + + output_table = output_file.create_table("/", "test", input_table.dtype) + print("appending data...", end=" ") + start = clock() + output_table.append(data) + print("flushing...", end=" ") + output_table.flush() + print(f"{clock() - start:.3f}s elapsed.") + print("done.") + + input_file.close() + output_file.close() + + +def copy5(input_path, output_path): + print(f"copying data from {input_path} to {output_path}...") + input_file = tb.open_file(input_path, mode="r") + output_file = tb.open_file(output_path, mode="w", filters=filters) + + input_table = input_file.root.test + output_table = output_file.create_table("/", "test", input_table.dtype) + + chunksize = 100_000 + rowsleft = len(input_table) + start = 0 + for chunk in range(int(len(input_table) / chunksize) + 1): + stop = start + min(chunksize, rowsleft) + data = input_table.read(start, stop) + output_table.append(data) + output_table.flush() + rowsleft -= chunksize + start = stop + + input_file.close() + output_file.close() + + +if __name__ == "__main__": + timed(create_table, "tmp.h5") + timed(copy1, "tmp.h5", "test1.h5") + timed(copy2, "tmp.h5", "test2.h5") + timed(copy3, "tmp.h5", "test3.h5") + timed(copy4, "tmp.h5", "test4.h5") + timed(copy5, "tmp.h5", "test5.h5") diff --git a/bench/undo_redo.py b/bench/undo_redo.py new file mode 100644 index 0000000..5ee3bd5 --- /dev/null +++ b/bench/undo_redo.py @@ -0,0 +1,245 @@ +"""Benchmark for undo/redo. +Run this program without parameters for mode of use.""" + +from time import perf_counter as clock + +import numpy as np + +import tables as tb + +verbose = 0 + + +class BasicBenchmark: + + def __init__(self, filename, testname, vecsize, nobjects, niter): + + self.file = filename + self.test = testname + self.vecsize = vecsize + self.nobjects = nobjects + self.niter = niter + + # Initialize the arrays + self.a1 = np.arange(0, 1 * self.vecsize) + self.a2 = np.arange(1 * self.vecsize, 2 * self.vecsize) + self.a3 = np.arange(2 * self.vecsize, 3 * self.vecsize) + + def setUp(self): + + # Create an HDF5 file + self.fileh = tb.open_file(self.file, mode="w") + # open the do/undo + self.fileh.enable_undo() + + def tearDown(self): + self.fileh.disable_undo() + self.fileh.close() + # Remove the temporary file + # os.remove(self.file) + + def create_node(self): + """Checking a undo/redo create_array.""" + + for i in range(self.nobjects): + # Create a new array + self.fileh.create_array("/", "array" + str(i), self.a1) + # Put a mark + self.fileh.mark() + # Unwind all marks sequentially + for i in range(self.niter): + t1 = clock() + for i in range(self.nobjects): + self.fileh.undo() + if verbose: + print("u", end=" ") + if verbose: + print() + undo = clock() - t1 + # Rewind all marks sequentially + t1 = clock() + for i in range(self.nobjects): + self.fileh.redo() + if verbose: + print("r", end=" ") + if verbose: + print() + redo = clock() - t1 + + print("Time for Undo, Redo (createNode):", undo, "s, ", redo, "s") + + def copy_children(self): + """Checking a undo/redo copy_children.""" + + # Create a group + self.fileh.create_group("/", "agroup") + # Create several objects there + for i in range(10): + # Create a new array + self.fileh.create_array("/agroup", "array" + str(i), self.a1) + # Excercise copy_children + for i in range(self.nobjects): + # Create another group for destination + self.fileh.create_group("/", "anothergroup" + str(i)) + # Copy children from /agroup to /anothergroup+i + self.fileh.copy_children("/agroup", "/anothergroup" + str(i)) + # Put a mark + self.fileh.mark() + # Unwind all marks sequentially + for i in range(self.niter): + t1 = clock() + for i in range(self.nobjects): + self.fileh.undo() + if verbose: + print("u", end=" ") + if verbose: + print() + undo = clock() - t1 + # Rewind all marks sequentially + t1 = clock() + for i in range(self.nobjects): + self.fileh.redo() + if verbose: + print("r", end=" ") + if verbose: + print() + redo = clock() - t1 + + print( + ( + "Time for Undo, Redo (copy_children):", + undo, + "s, ", + redo, + "s", + ) + ) + + def set_attr(self): + """Checking a undo/redo for setting attributes.""" + + # Create a new array + self.fileh.create_array("/", "array", self.a1) + for i in range(self.nobjects): + # Set an attribute + setattr(self.fileh.root.array.attrs, "attr" + str(i), str(self.a1)) + # Put a mark + self.fileh.mark() + # Unwind all marks sequentially + for i in range(self.niter): + t1 = clock() + for i in range(self.nobjects): + self.fileh.undo() + if verbose: + print("u", end=" ") + if verbose: + print() + undo = clock() - t1 + # Rewind all marks sequentially + t1 = clock() + for i in range(self.nobjects): + self.fileh.redo() + if verbose: + print("r", end=" ") + if verbose: + print() + redo = clock() - t1 + + print("Time for Undo, Redo (set_attr):", undo, "s, ", redo, "s") + + def runall(self): + + if testname == "all": + tests = [self.create_node, self.copy_children, self.set_attr] + elif testname == "createNode": + tests = [self.create_node] + elif testname == "copy_children": + tests = [self.copy_children] + elif testname == "set_attr": + tests = [self.set_attr] + for meth in tests: + self.setUp() + meth() + self.tearDown() + + +if __name__ == "__main__": + import sys + import getopt + + usage = ( + """usage: %s [-v] [-p] [-t test] [-s vecsize] [-n niter] datafile + -v verbose (total dump of profiling) + -p do profiling + -t {createNode|copy_children|set_attr|all} run the specified test + -s the size of vectors that are undone/redone + -n number of objects in operations + -i number of iterations for reading\n""" + % sys.argv[0] + ) + + try: + opts, pargs = getopt.getopt(sys.argv[1:], "vpt:s:n:i:") + except Exception: + sys.stderr.write(usage) + sys.exit(0) + + # if we pass too much parameters, abort + if len(pargs) != 1: + sys.stderr.write(usage) + sys.exit(0) + + # default options + verbose = 0 + profile = 0 + testname = "all" + vecsize = 10 + nobjects = 1 + niter = 1 + + # Get the options + for option in opts: + if option[0] == "-v": + verbose = 1 + elif option[0] == "-p": + profile = 1 + elif option[0] == "-t": + testname = option[1] + if testname not in [ + "createNode", + "copy_children", + "set_attr", + "all", + ]: + sys.stderr.write(usage) + sys.exit(0) + elif option[0] == "-s": + vecsize = int(option[1]) + elif option[0] == "-n": + nobjects = int(option[1]) + elif option[0] == "-i": + niter = int(option[1]) + + filename = pargs[0] + + bench = BasicBenchmark(filename, testname, vecsize, nobjects, niter) + if profile: + import hotshot + import hotshot.stats + + prof = hotshot.Profile("do_undo.prof") + prof.runcall(bench.runall) + prof.close() + stats = hotshot.stats.load("do_undo.prof") + stats.strip_dirs() + stats.sort_stats("time", "calls") + if verbose: + stats.print_stats() + else: + stats.print_stats(20) + else: + bench.runall() + +# Local Variables: +# mode: python +# End: diff --git a/bench/undo_redo.txt b/bench/undo_redo.txt new file mode 100644 index 0000000..8f0890e --- /dev/null +++ b/bench/undo_redo.txt @@ -0,0 +1,103 @@ +Benchmarks on PyTables Undo/Redo +================================ + +This is a small report for the performance of the Undo/Redo feature in +PyTables. + +A small script (see undo_redo.py) has been made in order to check +different scenarios for Undo/Redo, like creating single nodes, copying +children from one group to another, and creating attributes. + +Undo/Redo is independent of object size +--------------------------------------- + +Firstly, one thing to be noted is that the Undo/Redo feature is +independent of the object size that is being treated. For example, the +times for 10 objects (flag -n) each one with 10 elements (flag -s) is: + +$ time python2.4 undo_redo.py -n 10 -i 2 -s 10 data.nobackup/undo_redo.h5 +Time for Undo, Redo (createNode): 0.213686943054 s, 0.0727670192719 s +Time for Undo, Redo (createNode): 0.271666049957 s, 0.0740389823914 s +Time for Undo, Redo (copy_children): 0.296227931976 s, 0.161941051483 s +Time for Undo, Redo (copy_children): 0.363519906998 s, 0.162662982941 s +Time for Undo, Redo (set_attr): 0.208750009537 s, 0.0732419490814 s +Time for Undo, Redo (set_attr): 0.27628993988 s, 0.0736088752747 s + +real 0m5.557s +user 0m4.354s +sys 0m0.729s + +Note how all tests take more or less the same amount of time. This is +because a move operation is used as a central tool to implement the +Undo/Redo feature. Such a move operation has a constant cost, +independently of the size of the objects. For example, using objects +with 1000 elements, we can see that this does not affect the Undo/Redo +speed: + +$ time python2.4 undo_redo.py -n 10 -i 2 -s 1000 data.nobackup/undo_redo.h5 +Time for Undo, Redo (createNode): 0.213760137558 s, 0.0717759132385 s +Time for Undo, Redo (createNode): 0.276151895523 s, 0.0724079608917 s +Time for Undo, Redo (copy_children): 0.308417797089 s, 0.168260812759 s +Time for Undo, Redo (copy_children): 0.382102966309 s, 0.168042898178 s +Time for Undo, Redo (set_attr): 0.209735155106 s, 0.0740969181061 s +Time for Undo, Redo (set_attr): 0.279798984528 s, 0.0770981311798 s + +real 0m5.835s +user 0m4.585s +sys 0m0.736s + + +Undo/Redo times grow linearly with the number of objects implied +---------------------------------------------------------------- + +Secondly, the time for doing/undoing is obviously proportional +(linearly) to the number of objects that are implied in that process +(set by -n): + +$ time python2.4 undo_redo.py -n 100 -i 2 -s 10 data.nobackup/undo_redo.h5 +Time for Undo, Redo (createNode): 2.27267885208 s, 0.779091119766 s +Time for Undo, Redo (createNode): 2.31264209747 s, 0.766252040863 s +Time for Undo, Redo (copy_children): 3.01871585846 s, 1.63346219063 s +Time for Undo, Redo (copy_children): 3.07704997063 s, 1.62615203857 s +Time for Undo, Redo (set_attr): 2.18017196655 s, 0.809293985367 s +Time for Undo, Redo (set_attr): 2.23039293289 s, 0.809432029724 s + +real 0m48.395s +user 0m40.385s +sys 0m6.914s + + +A note on actual performance and place for improvement +------------------------------------------------------ + +Finally, note how the Undo/Redo capability of PyTables is pretty +fast. The next benchmark makes 1000 undo and 1000 redos for +create_array: + +$ time python2.4 undo_redo.py -n 1000 -i 2 -t createNode -s 1000 data.nobackup/undo_redo.h5 +Time for Undo, Redo (createNode): 22.7840828896 s, 7.9872610569 s +Time for Undo, Redo (createNode): 22.2799329758 s, 7.95833396912 s + +real 1m32.307s +user 1m16.598s +sys 0m15.105s + +i.e. an undo takes 23 milliseconds while a redo takes 8 milliseconds +approximately. + +The fact that undo operations take 3 times more than redo is probably +due to how the action log is implemented. The action log has been +implemented as a Table object, and PyTables has been optimized to read +rows of tables in *forward* direction (the one needed for redo +operations). However, when looking in *backward* direction (needed for +undo operations), the internal cache of PyTables is counterproductive +and makes look-ups quite slow (compared with forward access). +Nevertheless, the code for Undo/Redo has been optimized quite a bit to +smooth this kind of access as much as possible, but with a relative +success. A more definitive optimization should involve getting much +better performance for reading tables in backward direction. That +would be a major task, and can be eventually addressed in the future. + + +Francesc Alted +2005-03-10 diff --git a/bench/widetree.py b/bench/widetree.py new file mode 100644 index 0000000..327f5a7 --- /dev/null +++ b/bench/widetree.py @@ -0,0 +1,138 @@ +import tempfile +import unittest +from time import perf_counter as clock +from pathlib import Path + +import hotshot +import hotshot.stats + +import tables as tb + +verbose = 0 + + +class WideTreeTestCase(unittest.TestCase): + """Checks for maximum number of childs for a Group.""" + + def test00_leafs(self): + """Checking creation of large number of leafs (1024) per group. + + Variable 'maxchilds' controls this check. PyTables support up to + 4096 childs per group, but this would take too much memory (up + to 64 MB) for testing purposes (may be we can add a test for big + platforms). A 1024 childs run takes up to 30 MB. A 512 childs + test takes around 25 MB. + + """ + + maxchilds = 1000 + if verbose: + print("\n", "-=" * 30) + print("Running %s.test00_wideTree..." % self.__class__.__name__) + print("Maximum number of childs tested :", maxchilds) + # Open a new empty HDF5 file + # file = tempfile.mktemp(".h5") + file = "test_widetree.h5" + + fileh = tb.open_file(file, mode="w") + if verbose: + print("Children writing progress: ", end=" ") + for child in range(maxchilds): + if verbose: + print("%3d," % (child), end=" ") + a = [1, 1] + fileh.create_group( + fileh.root, "group" + str(child), "child: %d" % child + ) + fileh.create_array( + "/group" + str(child), + "array" + str(child), + a, + "child: %d" % child, + ) + if verbose: + print() + # Close the file + fileh.close() + + t1 = clock() + # Open the previous HDF5 file in read-only mode + fileh = tb.open_file(file, mode="r") + print( + "\nTime spent opening a file with %d groups + %d arrays: " + "%s s" % (maxchilds, maxchilds, clock() - t1) + ) + if verbose: + print("\nChildren reading progress: ", end=" ") + # Close the file + fileh.close() + # Then, delete the file + # os.remove(file) + + def test01_wide_tree(self): + """Checking creation of large number of groups (1024) per group. + + Variable 'maxchilds' controls this check. PyTables support up to + 4096 childs per group, but this would take too much memory (up + to 64 MB) for testing purposes (may be we can add a test for big + platforms). A 1024 childs run takes up to 30 MB. A 512 childs + test takes around 25 MB. + + """ + + maxchilds = 1000 + if verbose: + print("\n", "-=" * 30) + print("Running %s.test00_wideTree..." % self.__class__.__name__) + print("Maximum number of childs tested :", maxchilds) + # Open a new empty HDF5 file + file = tempfile.mktemp(".h5") + # file = "test_widetree.h5" + + fileh = tb.open_file(file, mode="w") + if verbose: + print("Children writing progress: ", end=" ") + for child in range(maxchilds): + if verbose: + print("%3d," % (child), end=" ") + fileh.create_group( + fileh.root, "group" + str(child), "child: %d" % child + ) + if verbose: + print() + # Close the file + fileh.close() + + t1 = clock() + # Open the previous HDF5 file in read-only mode + fileh = tb.open_file(file, mode="r") + print( + "\nTime spent opening a file with %d groups: %s s" + % (maxchilds, clock() - t1) + ) + # Close the file + fileh.close() + # Then, delete the file + Path(file).unlink() + + +# ---------------------------------------------------------------------- + + +def suite(): + suite_ = unittest.TestSuite() + from tables.tests.common import make_suite + + suite_.addTest(make_suite(WideTreeTestCase)) + + return suite_ + + +if __name__ == "__main__": + prof = hotshot.Profile("widetree.prof") + benchtime, stones = prof.runcall(unittest.main(defaultTest="suite")) + prof.close() + stats = hotshot.stats.load("widetree.prof") + stats.strip_dirs() + stats.sort_stats("time", "calls") + stats.print_stats(20) diff --git a/bench/widetree2.py b/bench/widetree2.py new file mode 100644 index 0000000..4a4dda0 --- /dev/null +++ b/bench/widetree2.py @@ -0,0 +1,122 @@ +import unittest + +import tables as tb + +verbose = 0 + + +class Test(tb.IsDescription): + ngroup = tb.Int32Col(pos=1) + ntable = tb.Int32Col(pos=2) + nrow = tb.Int32Col(pos=3) + # string = StringCol(itemsize=500, pos=4) + + +class WideTreeTestCase(unittest.TestCase): + + def test00_leafs(self): + + # Open a new empty HDF5 file + filename = "test_widetree.h5" + ngroups = 10 + ntables = 300 + nrows = 10 + complevel = 0 + complib = "lzo" + + print("Writing...") + # Open a file in "w"rite mode + fileh = tb.open_file(filename, mode="w", title="PyTables Stress Test") + + for k in range(ngroups): + # Create the group + group = fileh.create_group("/", "group%04d" % k, "Group %d" % k) + + fileh.close() + + # Now, create the tables + rowswritten = 0 + for k in range(ngroups): + print("Filling tables in group:", k) + fileh = tb.open_file(filename, mode="a", root_uep="group%04d" % k) + # Get the group + group = fileh.root + for j in range(ntables): + # Create a table + table = fileh.create_table( + group, + "table%04d" % j, + Test, + "Table%04d" % j, + tb.Filters(complevel, complib), + nrows, + ) + # Get the row object associated with the new table + row = table.row + # Fill the table + for i in range(nrows): + row["ngroup"] = k + row["ntable"] = j + row["nrow"] = i + row.append() + + rowswritten += nrows + table.flush() + + # Close the file + fileh.close() + + # read the file + print("Reading...") + rowsread = 0 + for ngroup in range(ngroups): + fileh = tb.open_file( + filename, mode="r", root_uep="group%04d" % ngroup + ) + # Get the group + group = fileh.root + ntable = 0 + if verbose: + print("Group ==>", group) + for table in fileh.list_nodes(group, "Table"): + if verbose > 1: + print("Table ==>", table) + print("Max rows in buf:", table.nrowsinbuf) + print("Rows in", table._v_pathname, ":", table.nrows) + print("Buffersize:", table.rowsize * table.nrowsinbuf) + print("MaxTuples:", table.nrowsinbuf) + + nrow = 0 + for row in table: + try: + assert row["ngroup"] == ngroup + assert row["ntable"] == ntable + assert row["nrow"] == nrow + except Exception: + print( + "Error in group: %d, table: %d, row: %d" + % (ngroup, ntable, nrow) + ) + print("Record ==>", row) + nrow += 1 + + assert nrow == table.nrows + rowsread += table.nrows + ntable += 1 + + # Close the file (eventually destroy the extended type) + fileh.close() + + +# ---------------------------------------------------------------------- +def suite(): + suite_ = unittest.TestSuite() + from tables.tests.common import make_suite + + suite_.addTest(make_suite(WideTreeTestCase)) + + return suite_ + + +if __name__ == "__main__": + unittest.main(defaultTest="suite") diff --git a/bench/woody-pentiumIV.txt b/bench/woody-pentiumIV.txt new file mode 100644 index 0000000..b7a09bc --- /dev/null +++ b/bench/woody-pentiumIV.txt @@ -0,0 +1,188 @@ +This is for Debian woody! + +Below are some benchmarking figures obtained while reading and writing +to a file with three tables, each table containing 10000 records. For +reference, the same tests have been repeated using the shelve module +that comes with Python. The tests were conducted on a platform with a +2 GHz AMD Athlon chip, an IDE disk at 4600 rpm, and 256 MB of RAM. + +Version 0.2 + + | medium size records | small size records + | (47 Bytes) | (16 Bytes) + +---------------------------+------------------------------ + | rows/s filesize | rows/s filesize + | write read | write read +------------+---------------------------+------------------------------ + no compress| | + record | 24400 39000 1184 KB | 32600 52600 506 KB + tupla | 17100 81100 1184 KB | 66666 107142 506 KB +------------+---------------------------+------------------------------ + compress | | + record | 22200 37500 494 KB | 31900 51700 94 KB + tupla | 16100 75000 494 KB | 63900 107142 94 KB +------------+---------------------------+------------------------------ + Shelve | 25800 14400 2500 KB | 68200 17000 921 KB + +New version (15-Jan-2003) + + + PyTables pre-0.3 + +Rec length | rows/s | KB/s | rows | filesz | memory | + | write read | write read | | (MB) | (MB) | +------------+-----------------+-----------------+-------+--------+--------+ + 16 B | 31000 166600 | 480 2600 | 3.e4 | 0.49| 6.5 | +------------+-----------------+-----------------+-------+--------+--------+ + 56 B | 17300 136000 | 942 7460 | 3.e4 | 1.7 | 7.2 | +------------+-----------------+-----------------+-------+--------+--------+ + 56 B* | 1560 136000 | 85 7560 | 3.e4 | 1.7 | 7.2 | +------------+-----------------+-----------------+-------+--------+--------+ + 64 B* | 1540 130000 | 96 8152 | 3.e4 | 1.9 | 7.2 | +------------+-----------------+-----------------+-------+--------+--------+ + 550 B* | 879 81100 | 472 43500 | 3.e4 | 19 | 7.2 | +------------+-----------------+-----------------+-------+--------+--------+ + 550 B** | 12000 103000 | 6440 55400 | 3.e5 | 168 | 7.2 | +------------+-----------------+-----------------+-------+--------+--------+ + 550 B** | 15500 81100 | 8350 43500 | 3.e4 | 19 | 7.2 | +------------+-----------------+-----------------+-------+--------+--------+ + 550 B**c| 909 1100 | 490 1081 | 3.e4 | 0.76| 8.5 | +------------+-----------------+-----------------+-------+--------+--------+ + 550 B***| 3600 81100 | 1950 43500 | 3.e4 | 19 | 7.2 | +------------+-----------------+-----------------+-------+--------+--------+ + +* These are figures obtained with a numarray as part of the record +** The numarray record fields are not set in each iteration +*** Some numarray elements of a record field are changed on each iteration +**c Like ** but with compression (level 1) + + +New version (10-March-2003) + + PyTables pre-0.4 + +Rec | rows/s | KB/s | rows | filesz | memory |%CP|%CP +length | write read | write read | | (MB) | (MB) |(w)|(r) +--------+-----------------+-----------------+-------+--------+--------+---+---- + 16 B |434000 469000 | 6800 7300 | 3.e4 | 0.49| 6.5 |100|100 +--------+-----------------+-----------------+-------+--------+--------+---+---- + 16 Bc |326000 435000 | 5100 6800 | 3.e4 | 0.12| 6.5 |100|100 +--------+-----------------+-----------------+-------+--------+--------+---+---- + 16 B |663000 728000 | 10400 11400 | 3.e5 | 4.7 | 7.0 | 99|100 +--------+-----------------+-----------------+-------+--------+--------+---+---- + 16 B |679000 797000 | 10600 12500 | 3.e6 | 46.0 | 10.0 | 98| 98 +--------+-----------------+-----------------+-------+--------+--------+---+---- + 16 Bc |452000 663000 | 7100 10400 | 3.e6 | 9.3 | 10.0 | 98| 98 +--------+-----------------+-----------------+-------+--------+--------+---+---- + 16 B |576000 590000 | 9000 9200 | 3.e7 | 458.0 | 11.0 | 78| 76 +--------+-----------------+-----------------+-------+--------+--------+---+---- + 56 B | 3050 380000 | 163 20700 | 3.e4 | 1.7 | 7.2 | 98|100 +--------+-----------------+-----------------+-------+--------+--------+---+---- + 56 B* |194000 340000 | 10600 18600 | 3.e4 | 1.7 | 7.2 |100|100 +--------+-----------------+-----------------+-------+--------+--------+---+---- + 56 B*c |142000 306000 | 7800 16600 | 3.e4 | 0.3 | 7.2 |100|100 +--------+-----------------+-----------------+-------+--------+--------+---+---- + 56 B* |273600 589000 | 14800 32214 | 3.e5 | 16.0 | 9.0 |100|100 +--------+-----------------+-----------------+-------+--------+--------+---+---- + 56 B*c |184000 425000 | 10070 23362 | 3.e5 | 2.7 | 9.7 |100|100 +--------+-----------------+-----------------+-------+--------+--------+---+---- + 56 B* |203600 649000 | 11100 35500 | 3.e6 | 161.0 | 12.0 | 72| 99 +--------+-----------------+-----------------+-------+--------+--------+---+---- + 56 B* |184000 229000 | 10000 12500 | 1.e7 | 534.0 | 17.0 | 56| 40 +--------+-----------------+-----------------+-------+--------+--------+---+---- + 56 B*np|184000 229000 | 10000 12500 | 1.e7 | 534.0 | 17.0 | 56| 40 +--------+-----------------+-----------------+-------+--------+--------+---+---- +550 B | 2230 143000 | 1195 76600 | 3.e4 | 19 | 9.4 |100|100 +--------+-----------------+-----------------+-------+--------+--------+---+---- +550 B* | 76000 250000 | 40900 134000 | 3.e4 | 19 | 9.4 |100|100 +--------+-----------------+-----------------+-------+--------+--------+---+---- +550 B*c | 13900 30000 | 7400 16100 | 3.e4 | 0.7 | 10.0 | 99| 99 +--------+-----------------+-----------------+-------+--------+--------+---+---- +550 B* | 25400 325000 | 13600 174000 | 3.e5 | 167 | 11.0 | 71| 96 +--------+-----------------+-----------------+-------+--------+--------+---+---- +550 B* | 18700 28000 | 10000 15100 | 6.e5 | 322 | 13.0 | 76| 9 +--------+-----------------+-----------------+-------+--------+--------+---+---- +550 B*c | 7300 21000 | 3900 11300 | 6.e5 | 11 | 17.0 | 98| 99 +--------+-----------------+-----------------+-------+--------+--------+---+---- + +* These are figures obtained with a numarray as part of the record +** The numarray record fields are not set in each iteration +c With compression (level 1) +np No psyco optimizations + + Shelve + +Rec length | rows/s | KB/s | rows | filesz | memory | + | write read | write read | | (MB) | (MB) | +------------+-----------------+-----------------+-------+--------+--------+ + 16 B | 68200 17000 | 1070 266 | 3.e4 | 0.94| 7.2 | +------------+-----------------+-----------------+-------+--------+--------+ + 56 B | 25000 14400 | 1367 784 | 3.e4 | 2.5 | 10.6 | +------------+-----------------+-----------------+-------+--------+--------+ + 56 B* | 2980 2710 | 162 148 | 3.e4 | 7.3 | 33 | +------------+-----------------+-----------------+-------+--------+--------+ + 64 B* | 2900 2700 | 182 168 | 3.e4 | 7.5 | 33 | +------------+-----------------+-----------------+-------+--------+--------+ + 550 B* | 1090 1310 | 590 710 | 3.e4 | 58 | 122 | +------------+-----------------+-----------------+-------+--------+--------+ + 550 B** | 16000 14900 | 2400 1200 | 3.e4 | 2.4 | 10.6 | +------------+-----------------+-----------------+-------+--------+--------+ + 550 B***| 28000 11900 | 2400 1100 | 3.e4 | 2.5 | 10.6 | +------------+-----------------+-----------------+-------+--------+--------+ + +* These are figures obtained with a numarray as part of the record +** The nuamrray records are not set on each iteration +*** Some numarray elements of a record field are changed on each iteration + + + Python cPickle & bsddb3 RECNO with variable length + +Rec | Krows/s | MB/s | Krows | filesz | memory |%CP|%CP +length | write read | write read | | (MB) | (MB) |(w)|(r) +--------+-----------------+-----------------+-------+--------+--------+---+---- + 16 B | 23.0 4.3 | 0.65 0.12 | 30 | 2.3 | 6.0 |100|100 +--------+-----------------+-----------------+-------+--------+--------+---+---- + 16 B | 22.0 4.3 | 0.60 0.12 | 300 | 24 | 25.0 |100|100 +--------+-----------------+-----------------+-------+--------+--------+---+---- + 56 B | 12.3 2.0 | 0.68 0.11 | 30 | 5.8 | 6.2 |100|100 +--------+-----------------+-----------------+-------+--------+--------+---+---- + 56 B | 8.8 2.0 | 0.44 0.11 | 300 | 61 | 6.2 |100|100 +--------+-----------------+-----------------+-------+--------+--------+---+---- + + + Python struct & bsddb3 RECNO with fixed length + +Rec | Krows/s | MB/s | Krows | filesz | memory |%CP|%CP +length | write read | write read | | (MB) | (MB) |(w)|(r) +--------+-----------------+-----------------+-------+--------+--------+---+---- + 16 B | 61 71 | 1.6 1.9 | 30 | 1.0 | 5.0 |100|100 +--------+-----------------+-----------------+-------+--------+--------+---+---- + 16 B | 56 65 | 1.5 1.8 | 300 | 10 | 5.8 |100|100 +--------+-----------------+-----------------+-------+--------+--------+---+---- + 16 B | 51 61 | 1.4 1.6 | 3000 | 100 | 6.1 |100|100 +--------+-----------------+-----------------+-------+--------+--------+---+---- + 56 B | 51 52 | 2.7 2.8 | 30 | 1.8 | 5.8 |100|100 +--------+-----------------+-----------------+-------+--------+--------+---+---- + 56 B | 18 50 | 1.0 2.7 | 300 | 18 | 6.2 |100|100 +--------+-----------------+-----------------+-------+--------+--------+---+---- + 56 B | 16 48 | 0.9 2.6 | 1000 | 61 | 6.5 |100|100 +--------+-----------------+-----------------+-------+--------+--------+---+---- + + + PySqlLite + +Rec | rows/s | KB/s | rows | filesz | memory |%CP|%CP +length | write read | write read | | (MB) | (MB) |(w)|(r) +--------+-----------------+-----------------+-------+--------+--------+---+---- + 16 B | 4290 1400000 | 200 48000 | 3.e4 | 1.4 | 5.0 |100|100 +--------+-----------------+-----------------+-------+--------+--------+---+---- + 16 B | 3660 1030000 | 182 51000 | 3.e5 | 15 | 5.0 |100|100 +--------+-----------------+-----------------+-------+--------+--------+---+---- + 16 B | 3580 230000 | 192 12380 | 6.e6 | 322 | 5.0 |100| 25 +--------+-----------------+-----------------+-------+--------+--------+---+---- + 56 B | 2990 882000 | 250 76000 | 3.e4 | 2.6 | 5.0 |100|100 +--------+-----------------+-----------------+-------+--------+--------+---+---- + 56 B | 2900 857000 | 270 80000 | 3.e5 | 28 | 5.0 |100|100 +--------+-----------------+-----------------+-------+--------+--------+---+---- + 56 B | 2900 120000 | 302 13100 | 3.e6 | 314 | 5.0 |100| 11 +--------+-----------------+-----------------+-------+--------+--------+---+---- diff --git a/c-blosc/blosc/bitshuffle-avx2.c b/c-blosc/blosc/bitshuffle-avx2.c new file mode 100644 index 0000000..6388825 --- /dev/null +++ b/c-blosc/blosc/bitshuffle-avx2.c @@ -0,0 +1,258 @@ +/* + * Bitshuffle - Filter for improving compression of typed binary data. + * + * Author: Kiyoshi Masui + * Website: https://github.com/kiyo-masui/bitshuffle + * Created: 2014 + * + * Note: Adapted for c-blosc by Francesc Alted. + * + * See LICENSES/BITSHUFFLE.txt file for details about copyright and + * rights to use. + * + */ + +#include "bitshuffle-generic.h" +#include "bitshuffle-sse2.h" +#include "bitshuffle-avx2.h" + + +/* Define dummy functions if AVX2 is not available for the compilation target and compiler. */ +#if !defined(__AVX2__) +#include + +int64_t blosc_internal_bshuf_trans_bit_elem_avx2(void* in, void* out, const size_t size, + const size_t elem_size, void* tmp_buf) { + abort(); +} + +int64_t blosc_internal_bshuf_untrans_bit_elem_avx2(void* in, void* out, const size_t size, + const size_t elem_size, void* tmp_buf) { + abort(); +} + +#else /* defined(__AVX2__) */ + +#include + +/* The next is useful for debugging purposes */ +#if 0 +#include +#include + +static void printymm(__m256i ymm0) +{ + uint8_t buf[32]; + + ((__m256i *)buf)[0] = ymm0; + printf("%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x\n", + buf[0], buf[1], buf[2], buf[3], + buf[4], buf[5], buf[6], buf[7], + buf[8], buf[9], buf[10], buf[11], + buf[12], buf[13], buf[14], buf[15], + buf[16], buf[17], buf[18], buf[19], + buf[20], buf[21], buf[22], buf[23], + buf[24], buf[25], buf[26], buf[27], + buf[28], buf[29], buf[30], buf[31]); +} +#endif + + +/* ---- Code that requires AVX2. Intel Haswell (2013) and later. ---- */ + + +/* Transpose bits within bytes. */ +static int64_t bshuf_trans_bit_byte_avx2(void* in, void* out, const size_t size, + const size_t elem_size) { + + char* in_b = (char*) in; + char* out_b = (char*) out; + int32_t* out_i32; + + size_t nbyte = elem_size * size; + + int64_t count; + + __m256i ymm; + int32_t bt; + size_t ii, kk; + + for (ii = 0; ii + 31 < nbyte; ii += 32) { + ymm = _mm256_loadu_si256((__m256i *) &in_b[ii]); + for (kk = 0; kk < 8; kk++) { + bt = _mm256_movemask_epi8(ymm); + ymm = _mm256_slli_epi16(ymm, 1); + out_i32 = (int32_t*) &out_b[((7 - kk) * nbyte + ii) / 8]; + *out_i32 = bt; + } + } + count = blosc_internal_bshuf_trans_bit_byte_remainder(in, out, size, elem_size, + nbyte - nbyte % 32); + return count; +} + +/* Transpose bits within elements. */ +int64_t blosc_internal_bshuf_trans_bit_elem_avx2(void* in, void* out, const size_t size, + const size_t elem_size, void* tmp_buf) { + int64_t count; + + CHECK_MULT_EIGHT(size); + + count = blosc_internal_bshuf_trans_byte_elem_sse2(in, out, size, elem_size, tmp_buf); + CHECK_ERR(count); + count = bshuf_trans_bit_byte_avx2(out, tmp_buf, size, elem_size); + CHECK_ERR(count); + count = blosc_internal_bshuf_trans_bitrow_eight(tmp_buf, out, size, elem_size); + + return count; +} + +/* For data organized into a row for each bit (8 * elem_size rows), transpose + * the bytes. */ +static int64_t bshuf_trans_byte_bitrow_avx2(void* in, void* out, const size_t size, + const size_t elem_size) { + + char* in_b = (char*) in; + char* out_b = (char*) out; + + size_t nrows = 8 * elem_size; + size_t nbyte_row = size / 8; + size_t ii, jj, kk, hh, mm; + + CHECK_MULT_EIGHT(size); + + if (elem_size % 4) + return blosc_internal_bshuf_trans_byte_bitrow_sse2(in, out, size, elem_size); + + __m256i ymm_0[8]; + __m256i ymm_1[8]; + __m256i ymm_storeage[8][4]; + + for (jj = 0; jj + 31 < nbyte_row; jj += 32) { + for (ii = 0; ii + 3 < elem_size; ii += 4) { + for (hh = 0; hh < 4; hh ++) { + + for (kk = 0; kk < 8; kk ++){ + ymm_0[kk] = _mm256_loadu_si256((__m256i *) &in_b[ + (ii * 8 + hh * 8 + kk) * nbyte_row + jj]); + } + + for (kk = 0; kk < 4; kk ++){ + ymm_1[kk] = _mm256_unpacklo_epi8(ymm_0[kk * 2], + ymm_0[kk * 2 + 1]); + ymm_1[kk + 4] = _mm256_unpackhi_epi8(ymm_0[kk * 2], + ymm_0[kk * 2 + 1]); + } + + for (kk = 0; kk < 2; kk ++){ + for (mm = 0; mm < 2; mm ++){ + ymm_0[kk * 4 + mm] = _mm256_unpacklo_epi16( + ymm_1[kk * 4 + mm * 2], + ymm_1[kk * 4 + mm * 2 + 1]); + ymm_0[kk * 4 + mm + 2] = _mm256_unpackhi_epi16( + ymm_1[kk * 4 + mm * 2], + ymm_1[kk * 4 + mm * 2 + 1]); + } + } + + for (kk = 0; kk < 4; kk ++){ + ymm_1[kk * 2] = _mm256_unpacklo_epi32(ymm_0[kk * 2], + ymm_0[kk * 2 + 1]); + ymm_1[kk * 2 + 1] = _mm256_unpackhi_epi32(ymm_0[kk * 2], + ymm_0[kk * 2 + 1]); + } + + for (kk = 0; kk < 8; kk ++){ + ymm_storeage[kk][hh] = ymm_1[kk]; + } + } + + for (mm = 0; mm < 8; mm ++) { + + for (kk = 0; kk < 4; kk ++){ + ymm_0[kk] = ymm_storeage[mm][kk]; + } + + ymm_1[0] = _mm256_unpacklo_epi64(ymm_0[0], ymm_0[1]); + ymm_1[1] = _mm256_unpacklo_epi64(ymm_0[2], ymm_0[3]); + ymm_1[2] = _mm256_unpackhi_epi64(ymm_0[0], ymm_0[1]); + ymm_1[3] = _mm256_unpackhi_epi64(ymm_0[2], ymm_0[3]); + + ymm_0[0] = _mm256_permute2x128_si256(ymm_1[0], ymm_1[1], 32); + ymm_0[1] = _mm256_permute2x128_si256(ymm_1[2], ymm_1[3], 32); + ymm_0[2] = _mm256_permute2x128_si256(ymm_1[0], ymm_1[1], 49); + ymm_0[3] = _mm256_permute2x128_si256(ymm_1[2], ymm_1[3], 49); + + _mm256_storeu_si256((__m256i *) &out_b[ + (jj + mm * 2 + 0 * 16) * nrows + ii * 8], ymm_0[0]); + _mm256_storeu_si256((__m256i *) &out_b[ + (jj + mm * 2 + 0 * 16 + 1) * nrows + ii * 8], ymm_0[1]); + _mm256_storeu_si256((__m256i *) &out_b[ + (jj + mm * 2 + 1 * 16) * nrows + ii * 8], ymm_0[2]); + _mm256_storeu_si256((__m256i *) &out_b[ + (jj + mm * 2 + 1 * 16 + 1) * nrows + ii * 8], ymm_0[3]); + } + } + } + for (ii = 0; ii < nrows; ii ++ ) { + for (jj = nbyte_row - nbyte_row % 32; jj < nbyte_row; jj ++) { + out_b[jj * nrows + ii] = in_b[ii * nbyte_row + jj]; + } + } + return size * elem_size; +} + + +/* Shuffle bits within the bytes of eight element blocks. */ +static int64_t bshuf_shuffle_bit_eightelem_avx2(void* in, void* out, const size_t size, + const size_t elem_size) { + + CHECK_MULT_EIGHT(size); + + /* With a bit of care, this could be written such that such that it is */ + /* in_buf = out_buf safe. */ + char* in_b = (char*) in; + char* out_b = (char*) out; + + size_t nbyte = elem_size * size; + size_t ii, jj, kk, ind; + + __m256i ymm; + int32_t bt; + + if (elem_size % 4) { + return blosc_internal_bshuf_shuffle_bit_eightelem_sse2(in, out, size, elem_size); + } else { + for (jj = 0; jj + 31 < 8 * elem_size; jj += 32) { + for (ii = 0; ii + 8 * elem_size - 1 < nbyte; + ii += 8 * elem_size) { + ymm = _mm256_loadu_si256((__m256i *) &in_b[ii + jj]); + for (kk = 0; kk < 8; kk++) { + bt = _mm256_movemask_epi8(ymm); + ymm = _mm256_slli_epi16(ymm, 1); + ind = (ii + jj / 8 + (7 - kk) * elem_size); + * (int32_t *) &out_b[ind] = bt; + } + } + } + } + return size * elem_size; +} + + +/* Untranspose bits within elements. */ +int64_t blosc_internal_bshuf_untrans_bit_elem_avx2(void* in, void* out, const size_t size, + const size_t elem_size, void* tmp_buf) { + + int64_t count; + + CHECK_MULT_EIGHT(size); + + count = bshuf_trans_byte_bitrow_avx2(in, tmp_buf, size, elem_size); + CHECK_ERR(count); + count = bshuf_shuffle_bit_eightelem_avx2(tmp_buf, out, size, elem_size); + + return count; +} + +#endif /* !defined(__AVX2__) */ diff --git a/c-blosc/blosc/bitshuffle-avx2.h b/c-blosc/blosc/bitshuffle-avx2.h new file mode 100644 index 0000000..0c80212 --- /dev/null +++ b/c-blosc/blosc/bitshuffle-avx2.h @@ -0,0 +1,38 @@ +/********************************************************************* + Blosc - Blocked Shuffling and Compression Library + + Author: Francesc Alted + + See LICENSE.txt for details about copyright and rights to use. +**********************************************************************/ + +/* AVX2-accelerated shuffle/unshuffle routines. */ + +#ifndef BITSHUFFLE_AVX2_H +#define BITSHUFFLE_AVX2_H + +#include "blosc-common.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + AVX2-accelerated bitshuffle routine. +*/ +BLOSC_NO_EXPORT int64_t +blosc_internal_bshuf_trans_bit_elem_avx2(void* in, void* out, const size_t size, + const size_t elem_size, void* tmp_buf); + +/** + AVX2-accelerated bitunshuffle routine. +*/ +BLOSC_NO_EXPORT int64_t +blosc_internal_bshuf_untrans_bit_elem_avx2(void* in, void* out, const size_t size, + const size_t elem_size, void* tmp_buf); + +#ifdef __cplusplus +} +#endif + +#endif /* BITSHUFFLE_AVX2_H */ diff --git a/c-blosc/blosc/bitshuffle-generic.c b/c-blosc/blosc/bitshuffle-generic.c new file mode 100644 index 0000000..6d6fd95 --- /dev/null +++ b/c-blosc/blosc/bitshuffle-generic.c @@ -0,0 +1,220 @@ +/********************************************************************* + Blosc - Blocked Shuffling and Compression Library + + Author: Francesc Alted + + See LICENSE.txt for details about copyright and rights to use. +**********************************************************************/ + +#include "bitshuffle-generic.h" + + +/* Transpose bytes within elements, starting partway through input. */ +int64_t blosc_internal_bshuf_trans_byte_elem_remainder(const void* in, void* out, const size_t size, + const size_t elem_size, const size_t start) { + + char* in_b = (char*) in; + char* out_b = (char*) out; + size_t ii, jj, kk; + + CHECK_MULT_EIGHT(start); + + if (size > start) { + /* ii loop separated into 2 loops so the compiler can unroll */ + /* the inner one. */ + for (ii = start; ii + 7 < size; ii += 8) { + for (jj = 0; jj < elem_size; jj++) { + for (kk = 0; kk < 8; kk++) { + out_b[jj * size + ii + kk] + = in_b[ii * elem_size + kk * elem_size + jj]; + } + } + } + for (ii = size - size % 8; ii < size; ii ++) { + for (jj = 0; jj < elem_size; jj++) { + out_b[jj * size + ii] = in_b[ii * elem_size + jj]; + } + } + } + return size * elem_size; +} + + +/* Transpose bytes within elements. */ +int64_t blosc_internal_bshuf_trans_byte_elem_scal(const void* in, void* out, const size_t size, + const size_t elem_size) { + + return blosc_internal_bshuf_trans_byte_elem_remainder(in, out, size, elem_size, 0); +} + + +/* Transpose bits within bytes. */ +int64_t blosc_internal_bshuf_trans_bit_byte_remainder(const void* in, void* out, const size_t size, + const size_t elem_size, const size_t start_byte) { + + const uint64_t* in_b = (const uint64_t*) in; + uint8_t* out_b = (uint8_t*) out; + + uint64_t x, t; + + size_t ii, kk; + size_t nbyte = elem_size * size; + size_t nbyte_bitrow = nbyte / 8; + + uint64_t e=1; + const int little_endian = *(uint8_t *) &e == 1; + const size_t bit_row_skip = little_endian ? nbyte_bitrow : -nbyte_bitrow; + const int64_t bit_row_offset = little_endian ? 0 : 7 * nbyte_bitrow; + + CHECK_MULT_EIGHT(nbyte); + CHECK_MULT_EIGHT(start_byte); + + for (ii = start_byte / 8; ii < nbyte_bitrow; ii ++) { + x = in_b[ii]; + if (little_endian) { + TRANS_BIT_8X8(x, t); + } else { + TRANS_BIT_8X8_BE(x, t); + } + for (kk = 0; kk < 8; kk ++) { + out_b[bit_row_offset + kk * bit_row_skip + ii] = x; + x = x >> 8; + } + } + return size * elem_size; +} + + +/* Transpose bits within bytes. */ +static int64_t bshuf_trans_bit_byte_scal(const void* in, void* out, const size_t size, + const size_t elem_size) { + + return blosc_internal_bshuf_trans_bit_byte_remainder(in, out, size, elem_size, 0); +} + +/* General transpose of an array, optimized for large element sizes. */ +int64_t blosc_internal_bshuf_trans_elem(const void* in, void* out, const size_t lda, + const size_t ldb, const size_t elem_size) { + + char* in_b = (char*) in; + char* out_b = (char*) out; + size_t ii, jj; + for (ii = 0; ii < lda; ii++) { + for (jj = 0; jj < ldb; jj++) { + memcpy(&out_b[(jj*lda + ii) * elem_size], + &in_b[(ii*ldb + jj) * elem_size], elem_size); + } + } + return lda * ldb * elem_size; +} + + +/* Transpose rows of shuffled bits (size / 8 bytes) within groups of 8. */ +int64_t blosc_internal_bshuf_trans_bitrow_eight(const void* in, void* out, const size_t size, + const size_t elem_size) { + + size_t nbyte_bitrow = size / 8; + + CHECK_MULT_EIGHT(size); + + return blosc_internal_bshuf_trans_elem(in, out, 8, elem_size, nbyte_bitrow); +} + + +/* Transpose bits within elements. */ +int64_t blosc_internal_bshuf_trans_bit_elem_scal(const void* in, void* out, const size_t size, + const size_t elem_size, void* tmp_buf) { + + int64_t count; + + CHECK_MULT_EIGHT(size); + + count = blosc_internal_bshuf_trans_byte_elem_scal(in, out, size, elem_size); + CHECK_ERR(count); + count = bshuf_trans_bit_byte_scal(out, tmp_buf, size, elem_size); + CHECK_ERR(count); + count = blosc_internal_bshuf_trans_bitrow_eight(tmp_buf, out, size, elem_size); + + return count; +} + + +/* For data organized into a row for each bit (8 * elem_size rows), transpose + * the bytes. */ +static int64_t bshuf_trans_byte_bitrow_scal(const void* in, void* out, const size_t size, + const size_t elem_size) { + char* in_b = (char*) in; + char* out_b = (char*) out; + + size_t nbyte_row = size / 8; + size_t ii, jj, kk; + + CHECK_MULT_EIGHT(size); + + for (jj = 0; jj < elem_size; jj++) { + for (ii = 0; ii < nbyte_row; ii++) { + for (kk = 0; kk < 8; kk++) { + out_b[ii * 8 * elem_size + jj * 8 + kk] = \ + in_b[(jj * 8 + kk) * nbyte_row + ii]; + } + } + } + return size * elem_size; +} + + +/* Shuffle bits within the bytes of eight element blocks. */ +int64_t blosc_internal_bshuf_shuffle_bit_eightelem_scal(const void* in, void* out, + const size_t size, const size_t elem_size) { + + const char *in_b; + char *out_b; + uint64_t x, t; + size_t ii, jj, kk; + size_t nbyte, out_index; + + uint64_t e=1; + const int little_endian = *(uint8_t *) &e == 1; + const size_t elem_skip = little_endian ? elem_size : -elem_size; + const uint64_t elem_offset = little_endian ? 0 : 7 * elem_size; + + CHECK_MULT_EIGHT(size); + + in_b = (const char*) in; + out_b = (char*) out; + + nbyte = elem_size * size; + + for (jj = 0; jj < 8 * elem_size; jj += 8) { + for (ii = 0; ii + 8 * elem_size - 1 < nbyte; ii += 8 * elem_size) { + x = *((uint64_t*) &in_b[ii + jj]); + if (little_endian) { + TRANS_BIT_8X8(x, t); + } else { + TRANS_BIT_8X8_BE(x, t); + } + for (kk = 0; kk < 8; kk++) { + out_index = ii + jj / 8 + elem_offset + kk * elem_skip; + *((uint8_t*) &out_b[out_index]) = x; + x = x >> 8; + } + } + } + return size * elem_size; +} + + +/* Untranspose bits within elements. */ +int64_t blosc_internal_bshuf_untrans_bit_elem_scal(const void* in, void* out, const size_t size, + const size_t elem_size, void* tmp_buf) { + + int64_t count; + + CHECK_MULT_EIGHT(size); + + count = bshuf_trans_byte_bitrow_scal(in, tmp_buf, size, elem_size); + CHECK_ERR(count); + count = blosc_internal_bshuf_shuffle_bit_eightelem_scal(tmp_buf, out, size, elem_size); + + return count; +} diff --git a/c-blosc/blosc/bitshuffle-generic.h b/c-blosc/blosc/bitshuffle-generic.h new file mode 100644 index 0000000..4f28194 --- /dev/null +++ b/c-blosc/blosc/bitshuffle-generic.h @@ -0,0 +1,161 @@ +/********************************************************************* + Blosc - Blocked Shuffling and Compression Library + + Author: Francesc Alted + + See LICENSE.txt for details about copyright and rights to use. +**********************************************************************/ + +/* Generic (non-hardware-accelerated) shuffle/unshuffle routines. + These are used when hardware-accelerated functions aren't available + for a particular platform; they are also used by the hardware- + accelerated functions to handle any remaining elements in a block + which isn't a multiple of the hardware's vector size. */ + +#ifndef BITSHUFFLE_GENERIC_H +#define BITSHUFFLE_GENERIC_H + +#include "blosc-common.h" +#include + +#ifdef __cplusplus +extern "C" { +#endif + + +/* Macros. */ +#define CHECK_MULT_EIGHT(n) if (n % 8) return -80; +#define MIN(X,Y) ((X) < (Y) ? (X) : (Y)) +#define MAX(X,Y) ((X) > (Y) ? (X) : (Y)) +#define CHECK_ERR(count) if (count < 0) { return count; } + + +/* ---- Worker code not requiring special instruction sets. ---- + * + * The following code does not use any x86 specific vectorized instructions + * and should compile on any machine + * + */ + +/* Transpose 8x8 bit array packed into a single quadword *x*. + * *t* is workspace. */ +#define TRANS_BIT_8X8(x, t) { \ + t = (x ^ (x >> 7)) & 0x00AA00AA00AA00AALL; \ + x = x ^ t ^ (t << 7); \ + t = (x ^ (x >> 14)) & 0x0000CCCC0000CCCCLL; \ + x = x ^ t ^ (t << 14); \ + t = (x ^ (x >> 28)) & 0x00000000F0F0F0F0LL; \ + x = x ^ t ^ (t << 28); \ + } + +/* Transpose 8x8 bit array along the diagonal from upper right + to lower left */ +#define TRANS_BIT_8X8_BE(x, t) { \ + t = (x ^ (x >> 9)) & 0x0055005500550055LL; \ + x = x ^ t ^ (t << 9); \ + t = (x ^ (x >> 18)) & 0x0000333300003333LL; \ + x = x ^ t ^ (t << 18); \ + t = (x ^ (x >> 36)) & 0x000000000F0F0F0FLL; \ + x = x ^ t ^ (t << 36); \ + } + +/* Transpose of an array of arbitrarily typed elements. */ +#define TRANS_ELEM_TYPE(in, out, lda, ldb, type_t) { \ + type_t* in_type = (type_t*) in; \ + type_t* out_type = (type_t*) out; \ + size_t ii, jj, kk; \ + for (ii = 0; ii + 7 < lda; ii += 8) { \ + for (jj = 0; jj < ldb; jj++) { \ + for (kk = 0; kk < 8; kk++) { \ + out_type[jj*lda + ii + kk] = \ + in_type[ii*ldb + kk * ldb + jj]; \ + } \ + } \ + } \ + for (ii = lda - lda % 8; ii < lda; ii ++) { \ + for (jj = 0; jj < ldb; jj++) { \ + out_type[jj*lda + ii] = in_type[ii*ldb + jj]; \ + } \ + } \ + } + + +/* Private functions */ +BLOSC_NO_EXPORT int64_t +blosc_internal_bshuf_trans_byte_elem_remainder(const void* in, void* out, const size_t size, + const size_t elem_size, const size_t start); + +BLOSC_NO_EXPORT int64_t +blosc_internal_bshuf_trans_byte_elem_scal(const void* in, void* out, const size_t size, + const size_t elem_size); + +BLOSC_NO_EXPORT int64_t +blosc_internal_bshuf_trans_bit_byte_remainder(const void* in, void* out, const size_t size, + const size_t elem_size, const size_t start_byte); + +BLOSC_NO_EXPORT int64_t +blosc_internal_bshuf_trans_elem(const void* in, void* out, const size_t lda, + const size_t ldb, const size_t elem_size); + +BLOSC_NO_EXPORT int64_t +blosc_internal_bshuf_trans_bitrow_eight(const void* in, void* out, const size_t size, + const size_t elem_size); + +BLOSC_NO_EXPORT int64_t +blosc_internal_bshuf_shuffle_bit_eightelem_scal(const void* in, void* out, + const size_t size, const size_t elem_size); + + +/* Bitshuffle the data. + * + * Transpose the bits within elements. + * + * Parameters + * ---------- + * in : input buffer, must be of size * elem_size bytes + * out : output buffer, must be of size * elem_size bytes + * size : number of elements in input + * elem_size : element size of typed data + * tmp_buffer : temporary buffer with the same `size` than `in` and `out` + * + * Returns + * ------- + * nothing -- this cannot fail + * + */ + +BLOSC_NO_EXPORT int64_t +blosc_internal_bshuf_trans_bit_elem_scal(const void* in, void* out, const size_t size, + const size_t elem_size, void* tmp_buf); + +/* Unshuffle bitshuffled data. + * + * Untranspose the bits within elements. + * + * To properly unshuffle bitshuffled data, *size* and *elem_size* must + * match the parameters used to shuffle the data. + * + * Parameters + * ---------- + * in : input buffer, must be of size * elem_size bytes + * out : output buffer, must be of size * elem_size bytes + * size : number of elements in input + * elem_size : element size of typed data + * tmp_buffer : temporary buffer with the same `size` than `in` and `out` + * + * Returns + * ------- + * nothing -- this cannot fail + * + */ + +BLOSC_NO_EXPORT int64_t +blosc_internal_bshuf_untrans_bit_elem_scal(const void* in, void* out, const size_t size, + const size_t elem_size, void* tmp_buf); + + +#ifdef __cplusplus +} +#endif + +#endif /* BITSHUFFLE_GENERIC_H */ diff --git a/c-blosc/blosc/bitshuffle-sse2.c b/c-blosc/blosc/bitshuffle-sse2.c new file mode 100644 index 0000000..d38dddb --- /dev/null +++ b/c-blosc/blosc/bitshuffle-sse2.c @@ -0,0 +1,480 @@ +/* + * Bitshuffle - Filter for improving compression of typed binary data. + * + * Author: Kiyoshi Masui + * Website: http://www.github.com/kiyo-masui/bitshuffle + * Created: 2014 + * + * Note: Adapted for c-blosc by Francesc Alted. + * + * See LICENSES/BITSHUFFLE.txt file for details about copyright and + * rights to use. + * + */ + +#include "bitshuffle-generic.h" +#include "bitshuffle-sse2.h" + +/* Define dummy functions if SSE2 is not available for the compilation target and compiler. */ +#if !defined(__SSE2__) +#include + +int64_t blosc_internal_bshuf_trans_byte_elem_sse2(void* in, void* out, const size_t size, + const size_t elem_size, void* tmp_buf) { + abort(); +} + +int64_t blosc_internal_bshuf_untrans_bit_elem_sse2(void* in, void* out, const size_t size, + const size_t elem_size, void* tmp_buf) { + abort(); +} + +#else /* defined(__SSE2__) */ + +#include + +/* The next is useful for debugging purposes */ +#if 0 +#include +#include + + +static void printxmm(__m128i xmm0) +{ + uint8_t buf[32]; + + ((__m128i *)buf)[0] = xmm0; + printf("%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x\n", + buf[0], buf[1], buf[2], buf[3], + buf[4], buf[5], buf[6], buf[7], + buf[8], buf[9], buf[10], buf[11], + buf[12], buf[13], buf[14], buf[15]); +} +#endif + + +/* ---- Worker code that requires SSE2. Intel Petium 4 (2000) and later. ---- */ + +/* Transpose bytes within elements for 16 bit elements. */ +static int64_t bshuf_trans_byte_elem_SSE_16(void* in, void* out, const size_t size) { + + char* in_b = (char*) in; + char* out_b = (char*) out; + __m128i a0, b0, a1, b1; + size_t ii; + + for (ii=0; ii + 15 < size; ii += 16) { + a0 = _mm_loadu_si128((__m128i *) &in_b[2*ii + 0*16]); + b0 = _mm_loadu_si128((__m128i *) &in_b[2*ii + 1*16]); + + a1 = _mm_unpacklo_epi8(a0, b0); + b1 = _mm_unpackhi_epi8(a0, b0); + + a0 = _mm_unpacklo_epi8(a1, b1); + b0 = _mm_unpackhi_epi8(a1, b1); + + a1 = _mm_unpacklo_epi8(a0, b0); + b1 = _mm_unpackhi_epi8(a0, b0); + + a0 = _mm_unpacklo_epi8(a1, b1); + b0 = _mm_unpackhi_epi8(a1, b1); + + _mm_storeu_si128((__m128i *) &out_b[0*size + ii], a0); + _mm_storeu_si128((__m128i *) &out_b[1*size + ii], b0); + } + return blosc_internal_bshuf_trans_byte_elem_remainder(in, out, size, 2, + size - size % 16); +} + + +/* Transpose bytes within elements for 32 bit elements. */ +static int64_t bshuf_trans_byte_elem_SSE_32(void* in, void* out, const size_t size) { + + char* in_b = (char*) in; + char* out_b = (char*) out; + __m128i a0, b0, c0, d0, a1, b1, c1, d1; + size_t ii; + + for (ii=0; ii + 15 < size; ii += 16) { + a0 = _mm_loadu_si128((__m128i *) &in_b[4*ii + 0*16]); + b0 = _mm_loadu_si128((__m128i *) &in_b[4*ii + 1*16]); + c0 = _mm_loadu_si128((__m128i *) &in_b[4*ii + 2*16]); + d0 = _mm_loadu_si128((__m128i *) &in_b[4*ii + 3*16]); + + a1 = _mm_unpacklo_epi8(a0, b0); + b1 = _mm_unpackhi_epi8(a0, b0); + c1 = _mm_unpacklo_epi8(c0, d0); + d1 = _mm_unpackhi_epi8(c0, d0); + + a0 = _mm_unpacklo_epi8(a1, b1); + b0 = _mm_unpackhi_epi8(a1, b1); + c0 = _mm_unpacklo_epi8(c1, d1); + d0 = _mm_unpackhi_epi8(c1, d1); + + a1 = _mm_unpacklo_epi8(a0, b0); + b1 = _mm_unpackhi_epi8(a0, b0); + c1 = _mm_unpacklo_epi8(c0, d0); + d1 = _mm_unpackhi_epi8(c0, d0); + + a0 = _mm_unpacklo_epi64(a1, c1); + b0 = _mm_unpackhi_epi64(a1, c1); + c0 = _mm_unpacklo_epi64(b1, d1); + d0 = _mm_unpackhi_epi64(b1, d1); + + _mm_storeu_si128((__m128i *) &out_b[0*size + ii], a0); + _mm_storeu_si128((__m128i *) &out_b[1*size + ii], b0); + _mm_storeu_si128((__m128i *) &out_b[2*size + ii], c0); + _mm_storeu_si128((__m128i *) &out_b[3*size + ii], d0); + } + return blosc_internal_bshuf_trans_byte_elem_remainder(in, out, size, 4, + size - size % 16); +} + + +/* Transpose bytes within elements for 64 bit elements. */ +static int64_t bshuf_trans_byte_elem_SSE_64(void* in, void* out, const size_t size) { + + char* in_b = (char*) in; + char* out_b = (char*) out; + __m128i a0, b0, c0, d0, e0, f0, g0, h0; + __m128i a1, b1, c1, d1, e1, f1, g1, h1; + size_t ii; + + for (ii=0; ii + 15 < size; ii += 16) { + a0 = _mm_loadu_si128((__m128i *) &in_b[8*ii + 0*16]); + b0 = _mm_loadu_si128((__m128i *) &in_b[8*ii + 1*16]); + c0 = _mm_loadu_si128((__m128i *) &in_b[8*ii + 2*16]); + d0 = _mm_loadu_si128((__m128i *) &in_b[8*ii + 3*16]); + e0 = _mm_loadu_si128((__m128i *) &in_b[8*ii + 4*16]); + f0 = _mm_loadu_si128((__m128i *) &in_b[8*ii + 5*16]); + g0 = _mm_loadu_si128((__m128i *) &in_b[8*ii + 6*16]); + h0 = _mm_loadu_si128((__m128i *) &in_b[8*ii + 7*16]); + + a1 = _mm_unpacklo_epi8(a0, b0); + b1 = _mm_unpackhi_epi8(a0, b0); + c1 = _mm_unpacklo_epi8(c0, d0); + d1 = _mm_unpackhi_epi8(c0, d0); + e1 = _mm_unpacklo_epi8(e0, f0); + f1 = _mm_unpackhi_epi8(e0, f0); + g1 = _mm_unpacklo_epi8(g0, h0); + h1 = _mm_unpackhi_epi8(g0, h0); + + a0 = _mm_unpacklo_epi8(a1, b1); + b0 = _mm_unpackhi_epi8(a1, b1); + c0 = _mm_unpacklo_epi8(c1, d1); + d0 = _mm_unpackhi_epi8(c1, d1); + e0 = _mm_unpacklo_epi8(e1, f1); + f0 = _mm_unpackhi_epi8(e1, f1); + g0 = _mm_unpacklo_epi8(g1, h1); + h0 = _mm_unpackhi_epi8(g1, h1); + + a1 = _mm_unpacklo_epi32(a0, c0); + b1 = _mm_unpackhi_epi32(a0, c0); + c1 = _mm_unpacklo_epi32(b0, d0); + d1 = _mm_unpackhi_epi32(b0, d0); + e1 = _mm_unpacklo_epi32(e0, g0); + f1 = _mm_unpackhi_epi32(e0, g0); + g1 = _mm_unpacklo_epi32(f0, h0); + h1 = _mm_unpackhi_epi32(f0, h0); + + a0 = _mm_unpacklo_epi64(a1, e1); + b0 = _mm_unpackhi_epi64(a1, e1); + c0 = _mm_unpacklo_epi64(b1, f1); + d0 = _mm_unpackhi_epi64(b1, f1); + e0 = _mm_unpacklo_epi64(c1, g1); + f0 = _mm_unpackhi_epi64(c1, g1); + g0 = _mm_unpacklo_epi64(d1, h1); + h0 = _mm_unpackhi_epi64(d1, h1); + + _mm_storeu_si128((__m128i *) &out_b[0*size + ii], a0); + _mm_storeu_si128((__m128i *) &out_b[1*size + ii], b0); + _mm_storeu_si128((__m128i *) &out_b[2*size + ii], c0); + _mm_storeu_si128((__m128i *) &out_b[3*size + ii], d0); + _mm_storeu_si128((__m128i *) &out_b[4*size + ii], e0); + _mm_storeu_si128((__m128i *) &out_b[5*size + ii], f0); + _mm_storeu_si128((__m128i *) &out_b[6*size + ii], g0); + _mm_storeu_si128((__m128i *) &out_b[7*size + ii], h0); + } + return blosc_internal_bshuf_trans_byte_elem_remainder(in, out, size, 8, + size - size % 16); +} + + +/* Memory copy with bshuf call signature. */ +static int64_t bshuf_copy(void* in, void* out, const size_t size, + const size_t elem_size) { + + char* in_b = (char*) in; + char* out_b = (char*) out; + + memcpy(out_b, in_b, size * elem_size); + return size * elem_size; +} + + +/* Transpose bytes within elements using best SSE algorithm available. */ +int64_t blosc_internal_bshuf_trans_byte_elem_sse2(void* in, void* out, const size_t size, + const size_t elem_size, void* tmp_buf) { + + int64_t count; + + /* Trivial cases: power of 2 bytes. */ + switch (elem_size) { + case 1: + count = bshuf_copy(in, out, size, elem_size); + return count; + case 2: + count = bshuf_trans_byte_elem_SSE_16(in, out, size); + return count; + case 4: + count = bshuf_trans_byte_elem_SSE_32(in, out, size); + return count; + case 8: + count = bshuf_trans_byte_elem_SSE_64(in, out, size); + return count; + } + + /* Worst case: odd number of bytes. Turns out that this is faster for */ + /* (odd * 2) byte elements as well (hence % 4). */ + if (elem_size % 4) { + count = blosc_internal_bshuf_trans_byte_elem_scal(in, out, size, elem_size); + return count; + } + + /* Multiple of power of 2: transpose hierarchically. */ + { + size_t nchunk_elem; + + if ((elem_size % 8) == 0) { + nchunk_elem = elem_size / 8; + TRANS_ELEM_TYPE(in, out, size, nchunk_elem, int64_t); + count = bshuf_trans_byte_elem_SSE_64(out, tmp_buf, + size * nchunk_elem); + blosc_internal_bshuf_trans_elem(tmp_buf, out, 8, nchunk_elem, size); + } else if ((elem_size % 4) == 0) { + nchunk_elem = elem_size / 4; + TRANS_ELEM_TYPE(in, out, size, nchunk_elem, int32_t); + count = bshuf_trans_byte_elem_SSE_32(out, tmp_buf, + size * nchunk_elem); + blosc_internal_bshuf_trans_elem(tmp_buf, out, 4, nchunk_elem, size); + } else { + /* Not used since scalar algorithm is faster. */ + nchunk_elem = elem_size / 2; + TRANS_ELEM_TYPE(in, out, size, nchunk_elem, int16_t); + count = bshuf_trans_byte_elem_SSE_16(out, tmp_buf, + size * nchunk_elem); + blosc_internal_bshuf_trans_elem(tmp_buf, out, 2, nchunk_elem, size); + } + + return count; + } +} + + +/* Transpose bits within bytes. */ +static int64_t bshuf_trans_bit_byte_sse2(void* in, void* out, const size_t size, + const size_t elem_size) { + + char* in_b = (char*) in; + char* out_b = (char*) out; + uint16_t* out_ui16; + int64_t count; + size_t nbyte = elem_size * size; + __m128i xmm; + int32_t bt; + size_t ii, kk; + + CHECK_MULT_EIGHT(nbyte); + + for (ii = 0; ii + 15 < nbyte; ii += 16) { + xmm = _mm_loadu_si128((__m128i *) &in_b[ii]); + for (kk = 0; kk < 8; kk++) { + bt = _mm_movemask_epi8(xmm); + xmm = _mm_slli_epi16(xmm, 1); + out_ui16 = (uint16_t*) &out_b[((7 - kk) * nbyte + ii) / 8]; + *out_ui16 = bt; + } + } + count = blosc_internal_bshuf_trans_bit_byte_remainder(in, out, size, elem_size, + nbyte - nbyte % 16); + return count; +} + + +/* Transpose bits within elements. */ +int64_t blosc_internal_bshuf_trans_bit_elem_sse2(void* in, void* out, const size_t size, + const size_t elem_size, void* tmp_buf) { + + int64_t count; + + CHECK_MULT_EIGHT(size); + + count = blosc_internal_bshuf_trans_byte_elem_sse2(in, out, size, elem_size, tmp_buf); + CHECK_ERR(count); + count = bshuf_trans_bit_byte_sse2(out, tmp_buf, size, elem_size); + CHECK_ERR(count); + count = blosc_internal_bshuf_trans_bitrow_eight(tmp_buf, out, size, elem_size); + + return count; +} + + +/* For data organized into a row for each bit (8 * elem_size rows), transpose + * the bytes. */ +int64_t blosc_internal_bshuf_trans_byte_bitrow_sse2(void* in, void* out, const size_t size, + const size_t elem_size) { + + char* in_b = (char*) in; + char* out_b = (char*) out; + size_t nrows = 8 * elem_size; + size_t nbyte_row = size / 8; + size_t ii, jj; + + __m128i a0, b0, c0, d0, e0, f0, g0, h0; + __m128i a1, b1, c1, d1, e1, f1, g1, h1; + __m128 *as, *bs, *cs, *ds, *es, *fs, *gs, *hs; + + CHECK_MULT_EIGHT(size); + + for (ii = 0; ii + 7 < nrows; ii += 8) { + for (jj = 0; jj + 15 < nbyte_row; jj += 16) { + a0 = _mm_loadu_si128((__m128i *) &in_b[(ii + 0)*nbyte_row + jj]); + b0 = _mm_loadu_si128((__m128i *) &in_b[(ii + 1)*nbyte_row + jj]); + c0 = _mm_loadu_si128((__m128i *) &in_b[(ii + 2)*nbyte_row + jj]); + d0 = _mm_loadu_si128((__m128i *) &in_b[(ii + 3)*nbyte_row + jj]); + e0 = _mm_loadu_si128((__m128i *) &in_b[(ii + 4)*nbyte_row + jj]); + f0 = _mm_loadu_si128((__m128i *) &in_b[(ii + 5)*nbyte_row + jj]); + g0 = _mm_loadu_si128((__m128i *) &in_b[(ii + 6)*nbyte_row + jj]); + h0 = _mm_loadu_si128((__m128i *) &in_b[(ii + 7)*nbyte_row + jj]); + + + a1 = _mm_unpacklo_epi8(a0, b0); + b1 = _mm_unpacklo_epi8(c0, d0); + c1 = _mm_unpacklo_epi8(e0, f0); + d1 = _mm_unpacklo_epi8(g0, h0); + e1 = _mm_unpackhi_epi8(a0, b0); + f1 = _mm_unpackhi_epi8(c0, d0); + g1 = _mm_unpackhi_epi8(e0, f0); + h1 = _mm_unpackhi_epi8(g0, h0); + + + a0 = _mm_unpacklo_epi16(a1, b1); + b0 = _mm_unpacklo_epi16(c1, d1); + c0 = _mm_unpackhi_epi16(a1, b1); + d0 = _mm_unpackhi_epi16(c1, d1); + + e0 = _mm_unpacklo_epi16(e1, f1); + f0 = _mm_unpacklo_epi16(g1, h1); + g0 = _mm_unpackhi_epi16(e1, f1); + h0 = _mm_unpackhi_epi16(g1, h1); + + + a1 = _mm_unpacklo_epi32(a0, b0); + b1 = _mm_unpackhi_epi32(a0, b0); + + c1 = _mm_unpacklo_epi32(c0, d0); + d1 = _mm_unpackhi_epi32(c0, d0); + + e1 = _mm_unpacklo_epi32(e0, f0); + f1 = _mm_unpackhi_epi32(e0, f0); + + g1 = _mm_unpacklo_epi32(g0, h0); + h1 = _mm_unpackhi_epi32(g0, h0); + + /* We don't have a storeh instruction for integers, so interpret */ + /* as a float. Have a storel (_mm_storel_epi64). */ + as = (__m128 *) &a1; + bs = (__m128 *) &b1; + cs = (__m128 *) &c1; + ds = (__m128 *) &d1; + es = (__m128 *) &e1; + fs = (__m128 *) &f1; + gs = (__m128 *) &g1; + hs = (__m128 *) &h1; + + _mm_storel_pi((__m64 *) &out_b[(jj + 0) * nrows + ii], *as); + _mm_storel_pi((__m64 *) &out_b[(jj + 2) * nrows + ii], *bs); + _mm_storel_pi((__m64 *) &out_b[(jj + 4) * nrows + ii], *cs); + _mm_storel_pi((__m64 *) &out_b[(jj + 6) * nrows + ii], *ds); + _mm_storel_pi((__m64 *) &out_b[(jj + 8) * nrows + ii], *es); + _mm_storel_pi((__m64 *) &out_b[(jj + 10) * nrows + ii], *fs); + _mm_storel_pi((__m64 *) &out_b[(jj + 12) * nrows + ii], *gs); + _mm_storel_pi((__m64 *) &out_b[(jj + 14) * nrows + ii], *hs); + + _mm_storeh_pi((__m64 *) &out_b[(jj + 1) * nrows + ii], *as); + _mm_storeh_pi((__m64 *) &out_b[(jj + 3) * nrows + ii], *bs); + _mm_storeh_pi((__m64 *) &out_b[(jj + 5) * nrows + ii], *cs); + _mm_storeh_pi((__m64 *) &out_b[(jj + 7) * nrows + ii], *ds); + _mm_storeh_pi((__m64 *) &out_b[(jj + 9) * nrows + ii], *es); + _mm_storeh_pi((__m64 *) &out_b[(jj + 11) * nrows + ii], *fs); + _mm_storeh_pi((__m64 *) &out_b[(jj + 13) * nrows + ii], *gs); + _mm_storeh_pi((__m64 *) &out_b[(jj + 15) * nrows + ii], *hs); + } + for (jj = nbyte_row - nbyte_row % 16; jj < nbyte_row; jj ++) { + out_b[jj * nrows + ii + 0] = in_b[(ii + 0)*nbyte_row + jj]; + out_b[jj * nrows + ii + 1] = in_b[(ii + 1)*nbyte_row + jj]; + out_b[jj * nrows + ii + 2] = in_b[(ii + 2)*nbyte_row + jj]; + out_b[jj * nrows + ii + 3] = in_b[(ii + 3)*nbyte_row + jj]; + out_b[jj * nrows + ii + 4] = in_b[(ii + 4)*nbyte_row + jj]; + out_b[jj * nrows + ii + 5] = in_b[(ii + 5)*nbyte_row + jj]; + out_b[jj * nrows + ii + 6] = in_b[(ii + 6)*nbyte_row + jj]; + out_b[jj * nrows + ii + 7] = in_b[(ii + 7)*nbyte_row + jj]; + } + } + return size * elem_size; +} + + +/* Shuffle bits within the bytes of eight element blocks. */ +int64_t blosc_internal_bshuf_shuffle_bit_eightelem_sse2(void* in, void* out, const size_t size, + const size_t elem_size) { + /* With a bit of care, this could be written such that such that it is */ + /* in_buf = out_buf safe. */ + char* in_b = (char*) in; + uint16_t* out_ui16 = (uint16_t*) out; + + size_t nbyte = elem_size * size; + + __m128i xmm; + int32_t bt; + size_t ii, jj, kk; + size_t ind; + + CHECK_MULT_EIGHT(size); + + if (elem_size % 2) { + blosc_internal_bshuf_shuffle_bit_eightelem_scal(in, out, size, elem_size); + } else { + for (ii = 0; ii + 8 * elem_size - 1 < nbyte; + ii += 8 * elem_size) { + for (jj = 0; jj + 15 < 8 * elem_size; jj += 16) { + xmm = _mm_loadu_si128((__m128i *) &in_b[ii + jj]); + for (kk = 0; kk < 8; kk++) { + bt = _mm_movemask_epi8(xmm); + xmm = _mm_slli_epi16(xmm, 1); + ind = (ii + jj / 8 + (7 - kk) * elem_size); + out_ui16[ind / 2] = bt; + } + } + } + } + return size * elem_size; +} + + +/* Untranspose bits within elements. */ +int64_t blosc_internal_bshuf_untrans_bit_elem_sse2(void* in, void* out, const size_t size, + const size_t elem_size, void* tmp_buf) { + + int64_t count; + + CHECK_MULT_EIGHT(size); + + count = blosc_internal_bshuf_trans_byte_bitrow_sse2(in, tmp_buf, size, elem_size); + CHECK_ERR(count); + count = blosc_internal_bshuf_shuffle_bit_eightelem_sse2(tmp_buf, out, size, elem_size); + + return count; +} + +#endif /* !defined(__SSE2__) */ diff --git a/c-blosc/blosc/bitshuffle-sse2.h b/c-blosc/blosc/bitshuffle-sse2.h new file mode 100644 index 0000000..ae85e08 --- /dev/null +++ b/c-blosc/blosc/bitshuffle-sse2.h @@ -0,0 +1,52 @@ +/********************************************************************* + Blosc - Blocked Shuffling and Compression Library + + Author: Francesc Alted + + See LICENSE.txt for details about copyright and rights to use. +**********************************************************************/ + +/* SSE2-accelerated shuffle/unshuffle routines. */ + +#ifndef BITSHUFFLE_SSE2_H +#define BITSHUFFLE_SSE2_H + +#include "blosc-common.h" + +#ifdef __cplusplus +extern "C" { +#endif + + +BLOSC_NO_EXPORT int64_t +blosc_internal_bshuf_trans_byte_elem_sse2(void* in, void* out, const size_t size, + const size_t elem_size, void* tmp_buf); + +BLOSC_NO_EXPORT int64_t +blosc_internal_bshuf_trans_byte_bitrow_sse2(void* in, void* out, const size_t size, + const size_t elem_size); + +BLOSC_NO_EXPORT int64_t +blosc_internal_bshuf_shuffle_bit_eightelem_sse2(void* in, void* out, const size_t size, + const size_t elem_size); + +/** + SSE2-accelerated bitshuffle routine. +*/ +BLOSC_NO_EXPORT int64_t +blosc_internal_bshuf_trans_bit_elem_sse2(void* in, void* out, const size_t size, + const size_t elem_size, void* tmp_buf); + +/** + SSE2-accelerated bitunshuffle routine. +*/ +BLOSC_NO_EXPORT int64_t +blosc_internal_bshuf_untrans_bit_elem_sse2(void* in, void* out, const size_t size, + const size_t elem_size, void* tmp_buf); + +#ifdef __cplusplus +} +#endif + + +#endif /* BITSHUFFLE_SSE2_H */ diff --git a/c-blosc/blosc/blosc-common.h b/c-blosc/blosc/blosc-common.h new file mode 100644 index 0000000..de77317 --- /dev/null +++ b/c-blosc/blosc/blosc-common.h @@ -0,0 +1,77 @@ +/********************************************************************* + Blosc - Blocked Shuffling and Compression Library + + Author: Francesc Alted + + See LICENSE.txt for details about copyright and rights to use. +**********************************************************************/ + +#ifndef SHUFFLE_COMMON_H +#define SHUFFLE_COMMON_H + +#include "blosc-export.h" +#include + +#ifdef __GNUC__ +#define BLOSC_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) +#endif // __GNUC__ + +/* Import standard integer type definitions */ +#if defined(_WIN32) && !defined(__MINGW32__) + /* stdint.h only available in VS2010 (VC++ 16.0) and newer */ + #if defined(_MSC_VER) && _MSC_VER < 1600 + #include "win32/stdint-windows.h" + #else + #include + #endif +#else + #include +#endif /* _WIN32 */ + + +/* Define the __SSE2__ symbol if compiling with Visual C++ and + targeting the minimum architecture level supporting SSE2. + Other compilers define this as expected and emit warnings + when it is re-defined. */ +#if !defined(__SSE2__) && defined(_MSC_VER) && \ + (defined(_M_X64) || (defined(_M_IX86) && _M_IX86_FP >= 2)) + #define __SSE2__ +#endif + +/* + * Detect if the architecture is fine with unaligned access. + */ +#if !defined(BLOSC_STRICT_ALIGN) +#define BLOSC_STRICT_ALIGN +#if defined(__i386__) || defined(__386) || defined (__amd64) /* GNU C, Sun Studio */ +#undef BLOSC_STRICT_ALIGN +#elif defined(__i486__) || defined(__i586__) || defined(__i686__) /* GNU C */ +#undef BLOSC_STRICT_ALIGN +#elif defined(_M_IX86) || defined(_M_X64) /* Intel, MSVC */ +#undef BLOSC_STRICT_ALIGN +#elif defined(__386) +#undef BLOSC_STRICT_ALIGN +#elif defined(_X86_) /* MinGW */ +#undef BLOSC_STRICT_ALIGN +#elif defined(__I86__) /* Digital Mars */ +#undef BLOSC_STRICT_ALIGN +/* Seems like unaligned access in ARM (at least ARMv6) is pretty + expensive, so we are going to always enforce strict alignment in ARM. + If anybody suggest that newer ARMs are better, we can revisit this. */ +/* #elif defined(__ARM_FEATURE_UNALIGNED) */ /* ARM, GNU C */ +/* #undef BLOSC_STRICT_ALIGN */ +#elif defined(_ARCH_PPC) || defined(__PPC__) +/* Modern PowerPC systems (like POWER8) should support unaligned access + quite efficiently. */ +#undef BLOSC_STRICT_ALIGN +#endif +#endif + +#if defined(__SSE2__) + #include +#endif +#if defined(__AVX2__) + #include +#endif + +#endif /* SHUFFLE_COMMON_H */ diff --git a/c-blosc/blosc/blosc-comp-features.h b/c-blosc/blosc/blosc-comp-features.h new file mode 100644 index 0000000..01fb158 --- /dev/null +++ b/c-blosc/blosc/blosc-comp-features.h @@ -0,0 +1,21 @@ +/********************************************************************* + Blosc - Blocked Shuffling and Compression Library + + Author: Francesc Alted + + See LICENSE.txt for details about copyright and rights to use. +**********************************************************************/ + +#ifndef BLOSC_COMP_FEATURES_H +#define BLOSC_COMP_FEATURES_H + +/* Use inlined functions for supported systems */ +#if defined(_MSC_VER) && !defined(__cplusplus) /* Visual Studio */ + #define BLOSC_INLINE __inline /* Visual C is not C99, but supports some kind of inline */ +#elif __STDC_VERSION__ >= 199901L + #define BLOSC_INLINE inline +#else + #define BLOSC_INLINE +#endif + +#endif /* BLOSC_COMP_FEATURES_H */ diff --git a/c-blosc/blosc/blosc-export.h b/c-blosc/blosc/blosc-export.h new file mode 100644 index 0000000..60aab05 --- /dev/null +++ b/c-blosc/blosc/blosc-export.h @@ -0,0 +1,45 @@ +/********************************************************************* + Blosc - Blocked Shuffling and Compression Library + + Author: Francesc Alted + + See LICENSE.txt for details about copyright and rights to use. +**********************************************************************/ +#ifndef BLOSC_EXPORT_H +#define BLOSC_EXPORT_H + +/* Macros for specifying exported symbols. + BLOSC_EXPORT is used to decorate symbols that should be + exported by the blosc shared library. + BLOSC_NO_EXPORT is used to decorate symbols that should NOT + be exported by the blosc shared library. +*/ +#if defined(BLOSC_SHARED_LIBRARY) + #if defined(_MSC_VER) + #define BLOSC_EXPORT __declspec(dllexport) + #elif (defined(__GNUC__) && __GNUC__ >= 4) || defined(__clang__) + #if defined(_WIN32) || defined(__CYGWIN__) || defined(__MINGW32__) + #define BLOSC_EXPORT __attribute__((dllexport)) + #else + #define BLOSC_EXPORT __attribute__((visibility("default"))) + #endif /* defined(_WIN32) || defined(__CYGWIN__) */ + #else + #error Cannot determine how to define BLOSC_EXPORT for this compiler. + #endif +#else + #define BLOSC_EXPORT +#endif /* defined(BLOSC_SHARED_LIBRARY) */ + +#if defined(__GNUC__) || defined(__clang__) + #define BLOSC_NO_EXPORT __attribute__((visibility("hidden"))) +#else + #define BLOSC_NO_EXPORT +#endif /* defined(__GNUC__) || defined(__clang__) */ + +/* When testing, export everything to make it easier to implement tests. */ +#if defined(BLOSC_TESTING) + #undef BLOSC_NO_EXPORT + #define BLOSC_NO_EXPORT BLOSC_EXPORT +#endif /* defined(BLOSC_TESTING) */ + +#endif /* BLOSC_EXPORT_H */ diff --git a/c-blosc/blosc/blosc.c b/c-blosc/blosc/blosc.c new file mode 100644 index 0000000..a3d137d --- /dev/null +++ b/c-blosc/blosc/blosc.c @@ -0,0 +1,2307 @@ +/********************************************************************* + Blosc - Blocked Shuffling and Compression Library + + Author: Francesc Alted + Creation date: 2009-05-20 + + See LICENSE.txt for details about copyright and rights to use. +**********************************************************************/ + + +#include +#include +#include +#include +#include +#include + +#include "fastcopy.h" + +#if defined(USING_CMAKE) + #include "config.h" +#endif /* USING_CMAKE */ +#include "blosc.h" +#include "shuffle.h" +#include "blosclz.h" +#if defined(HAVE_LZ4) + #include "lz4.h" + #include "lz4hc.h" +#endif /* HAVE_LZ4 */ +#if defined(HAVE_SNAPPY) + #include "snappy-c.h" +#endif /* HAVE_SNAPPY */ +#if defined(HAVE_ZLIB) + #include "zlib.h" +#endif /* HAVE_ZLIB */ +#if defined(HAVE_ZSTD) + #include "zstd.h" +#endif /* HAVE_ZSTD */ + +#if defined(_WIN32) && !defined(__MINGW32__) + #include + #include + + /* stdint.h only available in VS2010 (VC++ 16.0) and newer */ + #if defined(_MSC_VER) && _MSC_VER < 1600 + #include "win32/stdint-windows.h" + #else + #include + #endif + + #include + #define getpid _getpid +#else + #include + #include + #include +#endif /* _WIN32 */ + +/* Include the win32/pthread.h library for all the Windows builds. See #224. */ +#if defined(_WIN32) + #include "win32/pthread.h" + #include "win32/pthread.c" +#else + #include +#endif + + +/* Some useful units */ +#define KB 1024 +#define MB (1024 * (KB)) + +/* Minimum buffer size to be compressed */ +#define MIN_BUFFERSIZE 128 /* Cannot be smaller than 66 */ + +/* The maximum number of splits in a block for compression */ +#define MAX_SPLITS 16 /* Cannot be larger than 128 */ + +/* The size of L1 cache. 32 KB is quite common nowadays. */ +#define L1 (32 * (KB)) + +/* Have problems using posix barriers when symbol value is 200112L */ +/* This requires more investigation, but will work for the moment */ +#if defined(_POSIX_BARRIERS) && ( (_POSIX_BARRIERS - 20012L) >= 0 && _POSIX_BARRIERS != 200112L) +#define _POSIX_BARRIERS_MINE +#endif +/* Synchronization variables */ + + +struct blosc_context { + int32_t compress; /* 1 if we are doing compression 0 if decompress */ + + const uint8_t* src; + uint8_t* dest; /* The current pos in the destination buffer */ + uint8_t* header_flags; /* Flags for header */ + int compversion; /* Compressor version byte, only used during decompression */ + int32_t sourcesize; /* Number of bytes in source buffer (or uncompressed bytes in compressed file) */ + int32_t compressedsize; /* Number of bytes of compressed data (only used when decompressing) */ + int32_t nblocks; /* Number of total blocks in buffer */ + int32_t leftover; /* Extra bytes at end of buffer */ + int32_t blocksize; /* Length of the block in bytes */ + int32_t typesize; /* Type size */ + int32_t num_output_bytes; /* Counter for the number of output bytes */ + int32_t destsize; /* Maximum size for destination buffer */ + uint8_t* bstarts; /* Start of the buffer past header info */ + int32_t compcode; /* Compressor code to use */ + int clevel; /* Compression level (1-9) */ + /* Function to use for decompression. Only used when decompression */ + int (*decompress_func)(const void* input, int compressed_length, void* output, + int maxout); + + /* Threading */ + int32_t numthreads; + int32_t threads_started; + int32_t end_threads; + pthread_t threads[BLOSC_MAX_THREADS]; + int32_t tids[BLOSC_MAX_THREADS]; + pthread_mutex_t count_mutex; + #ifdef _POSIX_BARRIERS_MINE + pthread_barrier_t barr_init; + pthread_barrier_t barr_finish; + #else + int32_t count_threads; + pthread_mutex_t count_threads_mutex; + pthread_cond_t count_threads_cv; + #endif + #if !defined(_WIN32) + pthread_attr_t ct_attr; /* creation time attrs for threads */ + #endif + int32_t thread_giveup_code; /* error code when give up */ + int32_t thread_nblock; /* block counter */ +}; + +struct thread_context { + struct blosc_context* parent_context; + int32_t tid; + uint8_t* tmp; + uint8_t* tmp2; + uint8_t* tmp3; + int32_t tmpblocksize; /* Used to keep track of how big the temporary buffers are */ +}; + +/* Global context for non-contextual API */ +static struct blosc_context* g_global_context; +static pthread_mutex_t* global_comp_mutex; +static int32_t g_compressor = BLOSC_BLOSCLZ; /* the compressor to use by default */ +static int32_t g_threads = 1; +static int32_t g_force_blocksize = 0; +static int32_t g_initlib = 0; +static int32_t g_atfork_registered = 0; +static int32_t g_splitmode = BLOSC_FORWARD_COMPAT_SPLIT; + + + +/* Wrapped function to adjust the number of threads used by blosc */ +int blosc_set_nthreads_(struct blosc_context*); + +/* Releases the global threadpool */ +int blosc_release_threadpool(struct blosc_context* context); + +/* Macros for synchronization */ + +/* Wait until all threads are initialized */ +#ifdef _POSIX_BARRIERS_MINE +#define WAIT_INIT(RET_VAL, CONTEXT_PTR) \ + rc = pthread_barrier_wait(&CONTEXT_PTR->barr_init); \ + if (rc != 0 && rc != PTHREAD_BARRIER_SERIAL_THREAD) { \ + printf("Could not wait on barrier (init): %d\n", rc); \ + return((RET_VAL)); \ + } +#else +#define WAIT_INIT(RET_VAL, CONTEXT_PTR) \ + pthread_mutex_lock(&CONTEXT_PTR->count_threads_mutex); \ + if (CONTEXT_PTR->count_threads < CONTEXT_PTR->numthreads) { \ + CONTEXT_PTR->count_threads++; \ + pthread_cond_wait(&CONTEXT_PTR->count_threads_cv, &CONTEXT_PTR->count_threads_mutex); \ + } \ + else { \ + pthread_cond_broadcast(&CONTEXT_PTR->count_threads_cv); \ + } \ + pthread_mutex_unlock(&CONTEXT_PTR->count_threads_mutex); +#endif + +/* Wait for all threads to finish */ +#ifdef _POSIX_BARRIERS_MINE +#define WAIT_FINISH(RET_VAL, CONTEXT_PTR) \ + rc = pthread_barrier_wait(&CONTEXT_PTR->barr_finish); \ + if (rc != 0 && rc != PTHREAD_BARRIER_SERIAL_THREAD) { \ + printf("Could not wait on barrier (finish)\n"); \ + return((RET_VAL)); \ + } +#else +#define WAIT_FINISH(RET_VAL, CONTEXT_PTR) \ + pthread_mutex_lock(&CONTEXT_PTR->count_threads_mutex); \ + if (CONTEXT_PTR->count_threads > 0) { \ + CONTEXT_PTR->count_threads--; \ + pthread_cond_wait(&CONTEXT_PTR->count_threads_cv, &CONTEXT_PTR->count_threads_mutex); \ + } \ + else { \ + pthread_cond_broadcast(&CONTEXT_PTR->count_threads_cv); \ + } \ + pthread_mutex_unlock(&CONTEXT_PTR->count_threads_mutex); +#endif + + +/* A function for aligned malloc that is portable */ +static uint8_t *my_malloc(size_t size) +{ + void *block = NULL; + int res = 0; + +/* Do an alignment to 32 bytes because AVX2 is supported */ +#if defined(_WIN32) + /* A (void *) cast needed for avoiding a warning with MINGW :-/ */ + block = (void *)_aligned_malloc(size, 32); +#elif _POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600 + /* Platform does have an implementation of posix_memalign */ + res = posix_memalign(&block, 32, size); +#else + block = malloc(size); +#endif /* _WIN32 */ + + if (block == NULL || res != 0) { + printf("Error allocating memory!"); + return NULL; + } + + return (uint8_t *)block; +} + + +/* Release memory booked by my_malloc */ +static void my_free(void *block) +{ +#if defined(_WIN32) + _aligned_free(block); +#else + free(block); +#endif /* _WIN32 */ +} + + +/* Copy 4 bytes from `*pa` to int32_t, changing endianness if necessary. */ +static int32_t sw32_(const uint8_t *pa) +{ + int32_t idest; + uint8_t *dest = (uint8_t *)&idest; + int i = 1; /* for big/little endian detection */ + char *p = (char *)&i; + + if (p[0] != 1) { + /* big endian */ + dest[0] = pa[3]; + dest[1] = pa[2]; + dest[2] = pa[1]; + dest[3] = pa[0]; + } + else { + /* little endian */ + dest[0] = pa[0]; + dest[1] = pa[1]; + dest[2] = pa[2]; + dest[3] = pa[3]; + } + return idest; +} + + +/* Copy 4 bytes from `*pa` to `*dest`, changing endianness if necessary. */ +static void _sw32(uint8_t* dest, int32_t a) +{ + uint8_t *pa = (uint8_t *)&a; + int i = 1; /* for big/little endian detection */ + char *p = (char *)&i; + + if (p[0] != 1) { + /* big endian */ + dest[0] = pa[3]; + dest[1] = pa[2]; + dest[2] = pa[1]; + dest[3] = pa[0]; + } + else { + /* little endian */ + dest[0] = pa[0]; + dest[1] = pa[1]; + dest[2] = pa[2]; + dest[3] = pa[3]; + } +} + +/* + * Conversion routines between compressor and compression libraries + */ + +/* Return the library code associated with the compressor name */ +static int compname_to_clibcode(const char *compname) +{ + if (strcmp(compname, BLOSC_BLOSCLZ_COMPNAME) == 0) + return BLOSC_BLOSCLZ_LIB; + if (strcmp(compname, BLOSC_LZ4_COMPNAME) == 0) + return BLOSC_LZ4_LIB; + if (strcmp(compname, BLOSC_LZ4HC_COMPNAME) == 0) + return BLOSC_LZ4_LIB; + if (strcmp(compname, BLOSC_SNAPPY_COMPNAME) == 0) + return BLOSC_SNAPPY_LIB; + if (strcmp(compname, BLOSC_ZLIB_COMPNAME) == 0) + return BLOSC_ZLIB_LIB; + if (strcmp(compname, BLOSC_ZSTD_COMPNAME) == 0) + return BLOSC_ZSTD_LIB; + return -1; +} + +/* Return the library name associated with the compressor code */ +static const char *clibcode_to_clibname(int clibcode) +{ + if (clibcode == BLOSC_BLOSCLZ_LIB) return BLOSC_BLOSCLZ_LIBNAME; + if (clibcode == BLOSC_LZ4_LIB) return BLOSC_LZ4_LIBNAME; + if (clibcode == BLOSC_SNAPPY_LIB) return BLOSC_SNAPPY_LIBNAME; + if (clibcode == BLOSC_ZLIB_LIB) return BLOSC_ZLIB_LIBNAME; + if (clibcode == BLOSC_ZSTD_LIB) return BLOSC_ZSTD_LIBNAME; + return NULL; /* should never happen */ +} + + +/* + * Conversion routines between compressor names and compressor codes + */ + +/* Get the compressor name associated with the compressor code */ +int blosc_compcode_to_compname(int compcode, const char **compname) +{ + int code = -1; /* -1 means non-existent compressor code */ + const char *name = NULL; + + /* Map the compressor code */ + if (compcode == BLOSC_BLOSCLZ) + name = BLOSC_BLOSCLZ_COMPNAME; + else if (compcode == BLOSC_LZ4) + name = BLOSC_LZ4_COMPNAME; + else if (compcode == BLOSC_LZ4HC) + name = BLOSC_LZ4HC_COMPNAME; + else if (compcode == BLOSC_SNAPPY) + name = BLOSC_SNAPPY_COMPNAME; + else if (compcode == BLOSC_ZLIB) + name = BLOSC_ZLIB_COMPNAME; + else if (compcode == BLOSC_ZSTD) + name = BLOSC_ZSTD_COMPNAME; + + *compname = name; + + /* Guess if there is support for this code */ + if (compcode == BLOSC_BLOSCLZ) + code = BLOSC_BLOSCLZ; +#if defined(HAVE_LZ4) + else if (compcode == BLOSC_LZ4) + code = BLOSC_LZ4; + else if (compcode == BLOSC_LZ4HC) + code = BLOSC_LZ4HC; +#endif /* HAVE_LZ4 */ +#if defined(HAVE_SNAPPY) + else if (compcode == BLOSC_SNAPPY) + code = BLOSC_SNAPPY; +#endif /* HAVE_SNAPPY */ +#if defined(HAVE_ZLIB) + else if (compcode == BLOSC_ZLIB) + code = BLOSC_ZLIB; +#endif /* HAVE_ZLIB */ +#if defined(HAVE_ZSTD) + else if (compcode == BLOSC_ZSTD) + code = BLOSC_ZSTD; +#endif /* HAVE_ZSTD */ + + return code; +} + +/* Get the compressor code for the compressor name. -1 if it is not available */ +int blosc_compname_to_compcode(const char *compname) +{ + int code = -1; /* -1 means non-existent compressor code */ + + if (strcmp(compname, BLOSC_BLOSCLZ_COMPNAME) == 0) { + code = BLOSC_BLOSCLZ; + } +#if defined(HAVE_LZ4) + else if (strcmp(compname, BLOSC_LZ4_COMPNAME) == 0) { + code = BLOSC_LZ4; + } + else if (strcmp(compname, BLOSC_LZ4HC_COMPNAME) == 0) { + code = BLOSC_LZ4HC; + } +#endif /* HAVE_LZ4 */ +#if defined(HAVE_SNAPPY) + else if (strcmp(compname, BLOSC_SNAPPY_COMPNAME) == 0) { + code = BLOSC_SNAPPY; + } +#endif /* HAVE_SNAPPY */ +#if defined(HAVE_ZLIB) + else if (strcmp(compname, BLOSC_ZLIB_COMPNAME) == 0) { + code = BLOSC_ZLIB; + } +#endif /* HAVE_ZLIB */ +#if defined(HAVE_ZSTD) + else if (strcmp(compname, BLOSC_ZSTD_COMPNAME) == 0) { + code = BLOSC_ZSTD; + } +#endif /* HAVE_ZSTD */ + +return code; +} + + +#if defined(HAVE_LZ4) +static int lz4_wrap_compress(const char* input, size_t input_length, + char* output, size_t maxout, int accel) +{ + int cbytes; + cbytes = LZ4_compress_fast(input, output, (int)input_length, (int)maxout, + accel); + return cbytes; +} + +static int lz4hc_wrap_compress(const char* input, size_t input_length, + char* output, size_t maxout, int clevel) +{ + int cbytes; + if (input_length > (size_t)(UINT32_C(2)<<30)) + return -1; /* input larger than 2 GB is not supported */ + /* clevel for lz4hc goes up to 12, at least in LZ4 1.7.5 + * but levels larger than 9 do not buy much compression. */ + cbytes = LZ4_compress_HC(input, output, (int)input_length, (int)maxout, + clevel); + return cbytes; +} + +static int lz4_wrap_decompress(const void* input, int compressed_length, + void* output, int maxout) +{ + return LZ4_decompress_safe(input, output, compressed_length, maxout); +} + +#endif /* HAVE_LZ4 */ + +#if defined(HAVE_SNAPPY) +static int snappy_wrap_compress(const char* input, size_t input_length, + char* output, size_t maxout) +{ + snappy_status status; + size_t cl = maxout; + status = snappy_compress(input, input_length, output, &cl); + if (status != SNAPPY_OK){ + return 0; + } + return (int)cl; +} + +static int snappy_wrap_decompress(const void* input, int compressed_length, + void* output, int maxout) +{ + snappy_status status; + size_t ul = maxout; + status = snappy_uncompress(input, compressed_length, output, &ul); + if (status != SNAPPY_OK){ + return 0; + } + return (int)ul; +} +#endif /* HAVE_SNAPPY */ + +#if defined(HAVE_ZLIB) +/* zlib is not very respectful with sharing name space with others. + Fortunately, its names do not collide with those already in blosc. */ +static int zlib_wrap_compress(const char* input, size_t input_length, + char* output, size_t maxout, int clevel) +{ + int status; + uLongf cl = maxout; + status = compress2( + (Bytef*)output, &cl, (Bytef*)input, (uLong)input_length, clevel); + if (status != Z_OK){ + return 0; + } + return (int)cl; +} + +static int zlib_wrap_decompress(const void* input, int compressed_length, + void* output, int maxout) { + int status; + uLongf ul = maxout; + status = uncompress( + (Bytef*)output, &ul, (Bytef*)input, (uLong)compressed_length); + if (status != Z_OK){ + return 0; + } + return (int)ul; +} +#endif /* HAVE_ZLIB */ + +#if defined(HAVE_ZSTD) +static int zstd_wrap_compress(const char* input, size_t input_length, + char* output, size_t maxout, int clevel) { + size_t code; + clevel = (clevel < 9) ? clevel * 2 - 1 : ZSTD_maxCLevel(); + /* Make the level 8 close enough to maxCLevel */ + if (clevel == 8) clevel = ZSTD_maxCLevel() - 2; + code = ZSTD_compress( + (void*)output, maxout, (void*)input, input_length, clevel); + if (ZSTD_isError(code)) { + return 0; + } + return (int)code; +} + +static int zstd_wrap_decompress(const void* input, int compressed_length, + void* output, int maxout) { + size_t code; + code = ZSTD_decompress( + (void*)output, maxout, (void*)input, compressed_length); + if (ZSTD_isError(code)) { + return 0; + } + return (int)code; +} +#endif /* HAVE_ZSTD */ + +static int initialize_decompress_func(struct blosc_context* context) { + int8_t header_flags = *(context->header_flags); + int32_t compformat = (header_flags & 0xe0) >> 5; + int compversion = context->compversion; + + if (compformat == BLOSC_BLOSCLZ_FORMAT) { + if (compversion != BLOSC_BLOSCLZ_VERSION_FORMAT) { + return -9; + } + context->decompress_func = &blosclz_decompress; + return 0; + } +#if defined(HAVE_LZ4) + if (compformat == BLOSC_LZ4_FORMAT) { + if (compversion != BLOSC_LZ4_VERSION_FORMAT) { + return -9; + } + context->decompress_func = &lz4_wrap_decompress; + return 0; + } +#endif /* HAVE_LZ4 */ +#if defined(HAVE_SNAPPY) + if (compformat == BLOSC_SNAPPY_FORMAT) { + if (compversion != BLOSC_SNAPPY_VERSION_FORMAT) { + return -9; + } + context->decompress_func = &snappy_wrap_decompress; + return 0; + } +#endif /* HAVE_SNAPPY */ +#if defined(HAVE_ZLIB) + if (compformat == BLOSC_ZLIB_FORMAT) { + if (compversion != BLOSC_ZLIB_VERSION_FORMAT) { + return -9; + } + context->decompress_func = &zlib_wrap_decompress; + return 0; + } +#endif /* HAVE_ZLIB */ +#if defined(HAVE_ZSTD) + if (compformat == BLOSC_ZSTD_FORMAT) { + if (compversion != BLOSC_ZSTD_VERSION_FORMAT) { + return -9; + } + context->decompress_func = &zstd_wrap_decompress; + return 0; + } +#endif /* HAVE_ZSTD */ + return -5; /* signals no decompression support */ +} + +/* Compute acceleration for blosclz */ +static int get_accel(const struct blosc_context* context) { + int32_t clevel = context->clevel; + + if (context->compcode == BLOSC_LZ4) { + /* This acceleration setting based on discussions held in: + * https://groups.google.com/forum/#!topic/lz4c/zosy90P8MQw + */ + return (10 - clevel); + } + return 1; +} + + +/* Shuffle & compress a single block */ +static int blosc_c(const struct blosc_context* context, int32_t blocksize, + int32_t leftoverblock, int32_t ntbytes, int32_t maxbytes, + const uint8_t *src, uint8_t *dest, uint8_t *tmp, + uint8_t *tmp2) +{ + int8_t header_flags = *(context->header_flags); + int dont_split = (header_flags & 0x10) >> 4; + int32_t j, neblock, nsplits; + int32_t cbytes; /* number of compressed bytes in split */ + int32_t ctbytes = 0; /* number of compressed bytes in block */ + int32_t maxout; + int32_t typesize = context->typesize; + const uint8_t *_tmp = src; + const char *compname; + int accel; + int bscount; + int doshuffle = (header_flags & BLOSC_DOSHUFFLE) && (typesize > 1); + int dobitshuffle = ((header_flags & BLOSC_DOBITSHUFFLE) && + (blocksize >= typesize)); + + if (doshuffle) { + /* Byte shuffling only makes sense if typesize > 1 */ + blosc_internal_shuffle(typesize, blocksize, src, tmp); + _tmp = tmp; + } + /* We don't allow more than 1 filter at the same time (yet) */ + else if (dobitshuffle) { + bscount = blosc_internal_bitshuffle(typesize, blocksize, src, tmp, tmp2); + if (bscount < 0) + return bscount; + _tmp = tmp; + } + + /* Calculate acceleration for different compressors */ + accel = get_accel(context); + + /* The number of splits for this block */ + if (!dont_split && !leftoverblock) { + nsplits = typesize; + } + else { + nsplits = 1; + } + neblock = blocksize / nsplits; + for (j = 0; j < nsplits; j++) { + dest += sizeof(int32_t); + ntbytes += (int32_t)sizeof(int32_t); + ctbytes += (int32_t)sizeof(int32_t); + maxout = neblock; + #if defined(HAVE_SNAPPY) + if (context->compcode == BLOSC_SNAPPY) { + /* TODO perhaps refactor this to keep the value stashed somewhere */ + maxout = snappy_max_compressed_length(neblock); + } + #endif /* HAVE_SNAPPY */ + if (ntbytes+maxout > maxbytes) { + maxout = maxbytes - ntbytes; /* avoid buffer overrun */ + if (maxout <= 0) { + return 0; /* non-compressible block */ + } + } + if (context->compcode == BLOSC_BLOSCLZ) { + cbytes = blosclz_compress(context->clevel, _tmp+j*neblock, neblock, + dest, maxout, !dont_split); + } + #if defined(HAVE_LZ4) + else if (context->compcode == BLOSC_LZ4) { + cbytes = lz4_wrap_compress((char *)_tmp+j*neblock, (size_t)neblock, + (char *)dest, (size_t)maxout, accel); + } + else if (context->compcode == BLOSC_LZ4HC) { + cbytes = lz4hc_wrap_compress((char *)_tmp+j*neblock, (size_t)neblock, + (char *)dest, (size_t)maxout, + context->clevel); + } + #endif /* HAVE_LZ4 */ + #if defined(HAVE_SNAPPY) + else if (context->compcode == BLOSC_SNAPPY) { + cbytes = snappy_wrap_compress((char *)_tmp+j*neblock, (size_t)neblock, + (char *)dest, (size_t)maxout); + } + #endif /* HAVE_SNAPPY */ + #if defined(HAVE_ZLIB) + else if (context->compcode == BLOSC_ZLIB) { + cbytes = zlib_wrap_compress((char *)_tmp+j*neblock, (size_t)neblock, + (char *)dest, (size_t)maxout, + context->clevel); + } + #endif /* HAVE_ZLIB */ + #if defined(HAVE_ZSTD) + else if (context->compcode == BLOSC_ZSTD) { + cbytes = zstd_wrap_compress((char*)_tmp + j * neblock, (size_t)neblock, + (char*)dest, (size_t)maxout, context->clevel); + } + #endif /* HAVE_ZSTD */ + + else { + blosc_compcode_to_compname(context->compcode, &compname); + if (compname == NULL) { + compname = "(null)"; + } + fprintf(stderr, "Blosc has not been compiled with '%s' ", compname); + fprintf(stderr, "compression support. Please use one having it."); + return -5; /* signals no compression support */ + } + + if (cbytes > maxout) { + /* Buffer overrun caused by compression (should never happen) */ + return -1; + } + else if (cbytes < 0) { + /* cbytes should never be negative */ + return -2; + } + else if (cbytes == 0 || cbytes == neblock) { + /* The compressor has been unable to compress data at all. */ + /* Before doing the copy, check that we are not running into a + buffer overflow. */ + if ((ntbytes+neblock) > maxbytes) { + return 0; /* Non-compressible data */ + } + fastcopy(dest, _tmp + j * neblock, neblock); + cbytes = neblock; + } + _sw32(dest - 4, cbytes); + dest += cbytes; + ntbytes += cbytes; + ctbytes += cbytes; + } /* Closes j < nsplits */ + + return ctbytes; +} + +/* Decompress & unshuffle a single block */ +static int blosc_d(struct blosc_context* context, int32_t blocksize, + int32_t leftoverblock, const uint8_t* base_src, + int32_t src_offset, uint8_t* dest, uint8_t* tmp, + uint8_t* tmp2) { + int8_t header_flags = *(context->header_flags); + int dont_split = (header_flags & 0x10) >> 4; + int32_t j, neblock, nsplits; + int32_t nbytes; /* number of decompressed bytes in split */ + const int32_t compressedsize = context->compressedsize; + int32_t cbytes; /* number of compressed bytes in split */ + int32_t ctbytes = 0; /* number of compressed bytes in block */ + int32_t ntbytes = 0; /* number of uncompressed bytes in block */ + uint8_t *_tmp = dest; + int32_t typesize = context->typesize; + int bscount; + int doshuffle = (header_flags & BLOSC_DOSHUFFLE) && (typesize > 1); + int dobitshuffle = ((header_flags & BLOSC_DOBITSHUFFLE) && + (blocksize >= typesize)); + const uint8_t* src; + + if (doshuffle || dobitshuffle) { + _tmp = tmp; + } + + /* The number of splits for this block */ + if (!dont_split && + /* For compatibility with before the introduction of the split flag */ + ((typesize <= MAX_SPLITS) && (blocksize/typesize) >= MIN_BUFFERSIZE) && + !leftoverblock) { + nsplits = typesize; + } + else { + nsplits = 1; + } + + neblock = blocksize / nsplits; + for (j = 0; j < nsplits; j++) { + /* Validate src_offset */ + if (src_offset < 0 || src_offset > compressedsize - sizeof(int32_t)) { + return -1; + } + cbytes = sw32_(base_src + src_offset); /* amount of compressed bytes */ + src_offset += sizeof(int32_t); + /* Validate cbytes */ + if (cbytes < 0 || cbytes > context->compressedsize - src_offset) { + return -1; + } + ctbytes += (int32_t)sizeof(int32_t); + src = base_src + src_offset; + /* Uncompress */ + if (cbytes == neblock) { + fastcopy(_tmp, src, neblock); + nbytes = neblock; + } + else { + nbytes = context->decompress_func(src, cbytes, _tmp, neblock); + /* Check that decompressed bytes number is correct */ + if (nbytes != neblock) { + return -2; + } + } + src_offset += cbytes; + ctbytes += cbytes; + _tmp += nbytes; + ntbytes += nbytes; + } /* Closes j < nsplits */ + + if (doshuffle) { + blosc_internal_unshuffle(typesize, blocksize, tmp, dest); + } + else if (dobitshuffle) { + bscount = blosc_internal_bitunshuffle(typesize, blocksize, tmp, dest, tmp2); + if (bscount < 0) + return bscount; + } + + /* Return the number of uncompressed bytes */ + return ntbytes; +} + +/* Serial version for compression/decompression */ +static int serial_blosc(struct blosc_context* context) +{ + int32_t j, bsize, leftoverblock; + int32_t cbytes; + + int32_t ebsize = context->blocksize + context->typesize * (int32_t)sizeof(int32_t); + int32_t ntbytes = context->num_output_bytes; + + uint8_t *tmp = my_malloc(context->blocksize + ebsize); + uint8_t *tmp2 = tmp + context->blocksize; + + for (j = 0; j < context->nblocks; j++) { + if (context->compress && !(*(context->header_flags) & BLOSC_MEMCPYED)) { + _sw32(context->bstarts + j * 4, ntbytes); + } + bsize = context->blocksize; + leftoverblock = 0; + if ((j == context->nblocks - 1) && (context->leftover > 0)) { + bsize = context->leftover; + leftoverblock = 1; + } + if (context->compress) { + if (*(context->header_flags) & BLOSC_MEMCPYED) { + /* We want to memcpy only */ + fastcopy(context->dest + BLOSC_MAX_OVERHEAD + j * context->blocksize, + context->src + j * context->blocksize, bsize); + cbytes = bsize; + } + else { + /* Regular compression */ + cbytes = blosc_c(context, bsize, leftoverblock, ntbytes, + context->destsize, context->src+j*context->blocksize, + context->dest+ntbytes, tmp, tmp2); + if (cbytes == 0) { + ntbytes = 0; /* incompressible data */ + break; + } + } + } + else { + if (*(context->header_flags) & BLOSC_MEMCPYED) { + /* We want to memcpy only */ + fastcopy(context->dest + j * context->blocksize, + context->src + BLOSC_MAX_OVERHEAD + j * context->blocksize, bsize); + cbytes = bsize; + } + else { + /* Regular decompression */ + cbytes = blosc_d(context, bsize, leftoverblock, context->src, + sw32_(context->bstarts + j * 4), + context->dest + j * context->blocksize, tmp, tmp2); + } + } + if (cbytes < 0) { + ntbytes = cbytes; /* error in blosc_c or blosc_d */ + break; + } + ntbytes += cbytes; + } + + /* Free temporaries */ + my_free(tmp); + + return ntbytes; +} + + +/* Threaded version for compression/decompression */ +static int parallel_blosc(struct blosc_context* context) +{ + int rc; + (void)rc; // just to avoid 'unused-variable' warning + + /* Check whether we need to restart threads */ + if (blosc_set_nthreads_(context) < 0) { + return -1; + } + + /* Set sentinels */ + context->thread_giveup_code = 1; + context->thread_nblock = -1; + + /* Synchronization point for all threads (wait for initialization) */ + WAIT_INIT(-1, context); + + /* Synchronization point for all threads (wait for finalization) */ + WAIT_FINISH(-1, context); + + if (context->thread_giveup_code > 0) { + /* Return the total bytes (de-)compressed in threads */ + return context->num_output_bytes; + } + else { + /* Compression/decompression gave up. Return error code. */ + return context->thread_giveup_code; + } +} + + +/* Do the compression or decompression of the buffer depending on the + global params. */ +static int do_job(struct blosc_context* context) +{ + int32_t ntbytes; + + /* Run the serial version when nthreads is 1 or when the buffers are + not much larger than blocksize */ + if (context->numthreads == 1 || (context->sourcesize / context->blocksize) <= 1) { + ntbytes = serial_blosc(context); + } + else { + ntbytes = parallel_blosc(context); + } + + return ntbytes; +} + + +/* Whether a codec is meant for High Compression Ratios */ +#define HCR(codec) ( \ + ((codec) == BLOSC_LZ4HC) || \ + ((codec) == BLOSC_ZLIB) || \ + ((codec) == BLOSC_ZSTD) ? 1 : 0 ) + + +/* Conditions for splitting a block before compressing with a codec. */ +static int split_block(int compcode, int typesize, int blocksize) { + int splitblock = -1; + + switch (g_splitmode) { + case BLOSC_ALWAYS_SPLIT: + splitblock = 1; + break; + case BLOSC_NEVER_SPLIT: + splitblock = 0; + break; + case BLOSC_AUTO_SPLIT: + /* Normally all the compressors designed for speed benefit from a + split. However, in conducted benchmarks LZ4 seems that it runs + faster if we don't split, which is quite surprising. */ + splitblock= (((compcode == BLOSC_BLOSCLZ) || + (compcode == BLOSC_SNAPPY)) && + (typesize <= MAX_SPLITS) && + (blocksize / typesize) >= MIN_BUFFERSIZE); + break; + case BLOSC_FORWARD_COMPAT_SPLIT: + /* The zstd support was introduced at the same time than the split flag, so + * there should be not a problem with not splitting bloscks with it */ + splitblock = ((compcode != BLOSC_ZSTD) && + (typesize <= MAX_SPLITS) && + (blocksize / typesize) >= MIN_BUFFERSIZE); + break; + default: + fprintf(stderr, "Split mode %d not supported", g_splitmode); + } + return splitblock; +} + + +static int32_t compute_blocksize(struct blosc_context* context, int32_t clevel, + int32_t typesize, int32_t nbytes, + int32_t forced_blocksize) +{ + int32_t blocksize; + + /* Protection against very small buffers */ + if (nbytes < (int32_t)typesize) { + return 1; + } + + blocksize = nbytes; /* Start by a whole buffer as blocksize */ + + if (forced_blocksize) { + blocksize = forced_blocksize; + /* Check that forced blocksize is not too small */ + if (blocksize < MIN_BUFFERSIZE) { + blocksize = MIN_BUFFERSIZE; + } + /* Check that forced blocksize is not too large */ + if (blocksize > BLOSC_MAX_BLOCKSIZE) { + blocksize = BLOSC_MAX_BLOCKSIZE; + } + } + else if (nbytes >= L1) { + blocksize = L1; + + /* For HCR codecs, increase the block sizes by a factor of 2 because they + are meant for compressing large blocks (i.e. they show a big overhead + when compressing small ones). */ + if (HCR(context->compcode)) { + blocksize *= 2; + } + + switch (clevel) { + case 0: + /* Case of plain copy */ + blocksize /= 4; + break; + case 1: + blocksize /= 2; + break; + case 2: + blocksize *= 1; + break; + case 3: + blocksize *= 2; + break; + case 4: + case 5: + blocksize *= 4; + break; + case 6: + case 7: + case 8: + blocksize *= 8; + break; + case 9: + blocksize *= 8; + if (HCR(context->compcode)) { + blocksize *= 2; + } + break; + default: + assert(0); + break; + } + } + + /* Enlarge the blocksize for splittable codecs */ + if (clevel > 0 && split_block(context->compcode, typesize, blocksize)) { + if (blocksize > (1 << 18)) { + /* Do not use a too large split buffer (> 256 KB) for splitting codecs */ + blocksize = (1 << 18); + } + blocksize *= typesize; + if (blocksize < (1 << 16)) { + /* Do not use a too small blocksize (< 64 KB) when typesize is small */ + blocksize = (1 << 16); + } + if (blocksize > 1024 * 1024) { + /* But do not exceed 1 MB per thread (having this capacity in L3 is normal in modern CPUs) */ + blocksize = 1024 * 1024; + } + + } + + /* Check that blocksize is not too large */ + if (blocksize > (int32_t)nbytes) { + blocksize = nbytes; + } + + /* blocksize *must absolutely* be a multiple of the typesize */ + if (blocksize > typesize) { + blocksize = blocksize / typesize * typesize; + } + + return blocksize; +} + +static int initialize_context_compression(struct blosc_context* context, + int clevel, + int doshuffle, + size_t typesize, + size_t sourcesize, + const void* src, + void* dest, + size_t destsize, + int32_t compressor, + int32_t blocksize, + int32_t numthreads) +{ + char *envvar = NULL; + int warnlvl = 0; + /* Set parameters */ + context->compress = 1; + context->src = (const uint8_t*)src; + context->dest = (uint8_t *)(dest); + context->num_output_bytes = 0; + context->destsize = (int32_t)destsize; + context->sourcesize = sourcesize; + context->typesize = typesize; + context->compcode = compressor; + context->numthreads = numthreads; + context->end_threads = 0; + context->clevel = clevel; + + envvar = getenv("BLOSC_WARN"); + if (envvar != NULL) { + warnlvl = strtol(envvar, NULL, 10); + } + + /* Check buffer size limits */ + if (sourcesize > BLOSC_MAX_BUFFERSIZE) { + if (warnlvl > 0) { + fprintf(stderr, "Input buffer size cannot exceed %d bytes\n", + BLOSC_MAX_BUFFERSIZE); + } + return 0; + } + if (destsize < BLOSC_MAX_OVERHEAD) { + if (warnlvl > 0) { + fprintf(stderr, "Output buffer size should be larger than %d bytes\n", + BLOSC_MAX_OVERHEAD); + } + return 0; + } + + /* Compression level */ + if (clevel < 0 || clevel > 9) { + fprintf(stderr, "`clevel` parameter must be between 0 and 9!\n"); + return -10; + } + + /* Shuffle */ + if (doshuffle != 0 && doshuffle != 1 && doshuffle != 2) { + fprintf(stderr, "`shuffle` parameter must be either 0, 1 or 2!\n"); + return -10; + } + + /* Check typesize limits */ + if (context->typesize > BLOSC_MAX_TYPESIZE) { + /* If typesize is too large, treat buffer as an 1-byte stream. */ + context->typesize = 1; + } + + /* Get the blocksize */ + context->blocksize = compute_blocksize(context, clevel, (int32_t)context->typesize, context->sourcesize, blocksize); + + /* Compute number of blocks in buffer */ + context->nblocks = context->sourcesize / context->blocksize; + context->leftover = context->sourcesize % context->blocksize; + context->nblocks = (context->leftover > 0) ? (context->nblocks + 1) : context->nblocks; + + return 1; +} + + +static int write_compression_header(struct blosc_context* context, int clevel, int doshuffle) +{ + int32_t compformat; + int dont_split; + + /* Write version header for this block */ + context->dest[0] = BLOSC_VERSION_FORMAT; /* blosc format version */ + + /* Write compressor format */ + compformat = -1; + switch (context->compcode) + { + case BLOSC_BLOSCLZ: + compformat = BLOSC_BLOSCLZ_FORMAT; + context->dest[1] = BLOSC_BLOSCLZ_VERSION_FORMAT; /* blosclz format version */ + break; + +#if defined(HAVE_LZ4) + case BLOSC_LZ4: + compformat = BLOSC_LZ4_FORMAT; + context->dest[1] = BLOSC_LZ4_VERSION_FORMAT; /* lz4 format version */ + break; + case BLOSC_LZ4HC: + compformat = BLOSC_LZ4HC_FORMAT; + context->dest[1] = BLOSC_LZ4HC_VERSION_FORMAT; /* lz4hc is the same as lz4 */ + break; +#endif /* HAVE_LZ4 */ + +#if defined(HAVE_SNAPPY) + case BLOSC_SNAPPY: + compformat = BLOSC_SNAPPY_FORMAT; + context->dest[1] = BLOSC_SNAPPY_VERSION_FORMAT; /* snappy format version */ + break; +#endif /* HAVE_SNAPPY */ + +#if defined(HAVE_ZLIB) + case BLOSC_ZLIB: + compformat = BLOSC_ZLIB_FORMAT; + context->dest[1] = BLOSC_ZLIB_VERSION_FORMAT; /* zlib format version */ + break; +#endif /* HAVE_ZLIB */ + +#if defined(HAVE_ZSTD) + case BLOSC_ZSTD: + compformat = BLOSC_ZSTD_FORMAT; + context->dest[1] = BLOSC_ZSTD_VERSION_FORMAT; /* zstd format version */ + break; +#endif /* HAVE_ZSTD */ + + default: + { + const char *compname; + compname = clibcode_to_clibname(compformat); + if (compname == NULL) { + compname = "(null)"; + } + fprintf(stderr, "Blosc has not been compiled with '%s' ", compname); + fprintf(stderr, "compression support. Please use one having it."); + return -5; /* signals no compression support */ + break; + } + } + + context->header_flags = context->dest+2; /* flags */ + context->dest[2] = 0; /* zeroes flags */ + context->dest[3] = (uint8_t)context->typesize; /* type size */ + _sw32(context->dest + 4, context->sourcesize); /* size of the buffer */ + _sw32(context->dest + 8, context->blocksize); /* block size */ + context->bstarts = context->dest + 16; /* starts for every block */ + context->num_output_bytes = 16 + sizeof(int32_t)*context->nblocks; /* space for header and pointers */ + + if (context->clevel == 0) { + /* Compression level 0 means buffer to be memcpy'ed */ + *(context->header_flags) |= BLOSC_MEMCPYED; + context->num_output_bytes = 16; /* space just for header */ + } + + if (context->sourcesize < MIN_BUFFERSIZE) { + /* Buffer is too small. Try memcpy'ing. */ + *(context->header_flags) |= BLOSC_MEMCPYED; + context->num_output_bytes = 16; /* space just for header */ + } + + if (doshuffle == BLOSC_SHUFFLE) { + /* Byte-shuffle is active */ + *(context->header_flags) |= BLOSC_DOSHUFFLE; /* bit 0 set to one in flags */ + } + + if (doshuffle == BLOSC_BITSHUFFLE) { + /* Bit-shuffle is active */ + *(context->header_flags) |= BLOSC_DOBITSHUFFLE; /* bit 2 set to one in flags */ + } + + dont_split = !split_block(context->compcode, context->typesize, + context->blocksize); + *(context->header_flags) |= dont_split << 4; /* dont_split is in bit 4 */ + *(context->header_flags) |= compformat << 5; /* compressor format starts at bit 5 */ + + return 1; +} + + +int blosc_compress_context(struct blosc_context* context) +{ + int32_t ntbytes = 0; + + if ((*(context->header_flags) & BLOSC_MEMCPYED) && + (context->sourcesize + BLOSC_MAX_OVERHEAD > context->destsize)) { + return 0; /* data cannot be copied without overrun destination */ + } + + /* Do the actual compression */ + ntbytes = do_job(context); + if (ntbytes < 0) { + return -1; + } + if ((ntbytes == 0) && (context->sourcesize + BLOSC_MAX_OVERHEAD <= context->destsize)) { + /* Last chance for fitting `src` buffer in `dest`. Update flags and force a copy. */ + *(context->header_flags) |= BLOSC_MEMCPYED; + context->num_output_bytes = BLOSC_MAX_OVERHEAD; /* reset the output bytes in previous step */ + ntbytes = do_job(context); + if (ntbytes < 0) { + return -1; + } + } + + /* Set the number of compressed bytes in header */ + _sw32(context->dest + 12, ntbytes); + + assert(ntbytes <= context->destsize); + return ntbytes; +} + +/* The public routine for compression with context. */ +int blosc_compress_ctx(int clevel, int doshuffle, size_t typesize, + size_t nbytes, const void* src, void* dest, + size_t destsize, const char* compressor, + size_t blocksize, int numinternalthreads) +{ + int error, result; + struct blosc_context context; + + context.threads_started = 0; + error = initialize_context_compression(&context, clevel, doshuffle, typesize, + nbytes, src, dest, destsize, + blosc_compname_to_compcode(compressor), + blocksize, numinternalthreads); + if (error <= 0) { return error; } + + error = write_compression_header(&context, clevel, doshuffle); + if (error <= 0) { return error; } + + result = blosc_compress_context(&context); + + if (numinternalthreads > 1) + { + blosc_release_threadpool(&context); + } + + return result; +} + +/* The public routine for compression. See blosc.h for docstrings. */ +int blosc_compress(int clevel, int doshuffle, size_t typesize, size_t nbytes, + const void *src, void *dest, size_t destsize) +{ + int result; + char* envvar; + + /* Check if should initialize */ + if (!g_initlib) blosc_init(); + + /* Check for environment variables */ + envvar = getenv("BLOSC_CLEVEL"); + if (envvar != NULL) { + long value; + value = strtol(envvar, NULL, 10); + if ((value != EINVAL) && (value >= 0)) { + clevel = (int)value; + } + } + + envvar = getenv("BLOSC_SHUFFLE"); + if (envvar != NULL) { + if (strcmp(envvar, "NOSHUFFLE") == 0) { + doshuffle = BLOSC_NOSHUFFLE; + } + if (strcmp(envvar, "SHUFFLE") == 0) { + doshuffle = BLOSC_SHUFFLE; + } + if (strcmp(envvar, "BITSHUFFLE") == 0) { + doshuffle = BLOSC_BITSHUFFLE; + } + } + + envvar = getenv("BLOSC_TYPESIZE"); + if (envvar != NULL) { + long value; + value = strtol(envvar, NULL, 10); + if ((value != EINVAL) && (value > 0)) { + typesize = (int)value; + } + } + + envvar = getenv("BLOSC_COMPRESSOR"); + if (envvar != NULL) { + result = blosc_set_compressor(envvar); + if (result < 0) { return result; } + } + + envvar = getenv("BLOSC_BLOCKSIZE"); + if (envvar != NULL) { + long blocksize; + blocksize = strtol(envvar, NULL, 10); + if ((blocksize != EINVAL) && (blocksize > 0)) { + blosc_set_blocksize((size_t)blocksize); + } + } + + envvar = getenv("BLOSC_NTHREADS"); + if (envvar != NULL) { + long nthreads; + nthreads = strtol(envvar, NULL, 10); + if ((nthreads != EINVAL) && (nthreads > 0)) { + result = blosc_set_nthreads((int)nthreads); + if (result < 0) { return result; } + } + } + + envvar = getenv("BLOSC_SPLITMODE"); + if (envvar != NULL) { + if (strcmp(envvar, "FORWARD_COMPAT") == 0) { + blosc_set_splitmode(BLOSC_FORWARD_COMPAT_SPLIT); + } + else if (strcmp(envvar, "AUTO") == 0) { + blosc_set_splitmode(BLOSC_AUTO_SPLIT); + } + else if (strcmp(envvar, "ALWAYS") == 0) { + blosc_set_splitmode(BLOSC_ALWAYS_SPLIT); + } + else if (strcmp(envvar, "NEVER") == 0) { + blosc_set_splitmode(BLOSC_NEVER_SPLIT); + } + else { + fprintf(stderr, "BLOSC_SPLITMODE environment variable '%s' not recognized\n", envvar); + return -1; + } + } + + /* Check for a BLOSC_NOLOCK environment variable. It is important + that this should be the last env var so that it can take the + previous ones into account */ + envvar = getenv("BLOSC_NOLOCK"); + if (envvar != NULL) { + const char *compname; + blosc_compcode_to_compname(g_compressor, &compname); + result = blosc_compress_ctx(clevel, doshuffle, typesize, + nbytes, src, dest, destsize, + compname, g_force_blocksize, g_threads); + return result; + } + + pthread_mutex_lock(global_comp_mutex); + + do { + result = initialize_context_compression(g_global_context, clevel, doshuffle, + typesize, nbytes, src, dest, destsize, + g_compressor, g_force_blocksize, + g_threads); + if (result <= 0) { break; } + + result = write_compression_header(g_global_context, clevel, doshuffle); + if (result <= 0) { break; } + + result = blosc_compress_context(g_global_context); + } while (0); + + pthread_mutex_unlock(global_comp_mutex); + + return result; +} + +static int blosc_run_decompression_with_context(struct blosc_context* context, + const void* src, + void* dest, + size_t destsize, + int numinternalthreads) +{ + uint8_t version; + int32_t ntbytes; + + context->compress = 0; + context->src = (const uint8_t*)src; + context->dest = (uint8_t*)dest; + context->destsize = destsize; + context->num_output_bytes = 0; + context->numthreads = numinternalthreads; + context->end_threads = 0; + + /* Read the header block */ + version = context->src[0]; /* blosc format version */ + context->compversion = context->src[1]; + + context->header_flags = (uint8_t*)(context->src + 2); /* flags */ + context->typesize = (int32_t)context->src[3]; /* typesize */ + context->sourcesize = sw32_(context->src + 4); /* buffer size */ + context->blocksize = sw32_(context->src + 8); /* block size */ + context->compressedsize = sw32_(context->src + 12); /* compressed buffer size */ + context->bstarts = (uint8_t*)(context->src + 16); + + if (context->sourcesize == 0) { + /* Source buffer was empty, so we are done */ + return 0; + } + + if (context->blocksize <= 0 || context->blocksize > destsize || + context->blocksize > BLOSC_MAX_BLOCKSIZE || context->typesize <= 0 || + context->typesize > BLOSC_MAX_TYPESIZE) { + return -1; + } + + if (version != BLOSC_VERSION_FORMAT) { + /* Version from future */ + return -1; + } + if (*context->header_flags & 0x08) { + /* compressor flags from the future */ + return -1; + } + + /* Compute some params */ + /* Total blocks */ + context->nblocks = context->sourcesize / context->blocksize; + context->leftover = context->sourcesize % context->blocksize; + context->nblocks = (context->leftover>0)? context->nblocks+1: context->nblocks; + + /* Check that we have enough space to decompress */ + if (context->sourcesize > (int32_t)destsize) { + return -1; + } + + if (*(context->header_flags) & BLOSC_MEMCPYED) { + /* Validate that compressed size is equal to decompressed size + header + size. */ + if (context->sourcesize + BLOSC_MAX_OVERHEAD != context->compressedsize) { + return -1; + } + } else { + ntbytes = initialize_decompress_func(context); + if (ntbytes != 0) return ntbytes; + + /* Validate that compressed size is large enough to hold the bstarts array */ + if (context->nblocks > (context->compressedsize - 16) / 4) { + return -1; + } + } + + /* Do the actual decompression */ + ntbytes = do_job(context); + if (ntbytes < 0) { + return -1; + } + + assert(ntbytes <= (int32_t)destsize); + return ntbytes; +} + +int blosc_decompress_ctx(const void* src, void* dest, size_t destsize, + int numinternalthreads) { + int result; + struct blosc_context context; + + context.threads_started = 0; + result = blosc_run_decompression_with_context(&context, src, dest, destsize, + numinternalthreads); + + if (numinternalthreads > 1) + { + blosc_release_threadpool(&context); + } + + return result; +} + +int blosc_decompress(const void* src, void* dest, size_t destsize) { + int result; + char* envvar; + long nthreads; + + /* Check if should initialize */ + if (!g_initlib) blosc_init(); + + /* Check for a BLOSC_NTHREADS environment variable */ + envvar = getenv("BLOSC_NTHREADS"); + if (envvar != NULL) { + nthreads = strtol(envvar, NULL, 10); + if ((nthreads != EINVAL) && (nthreads > 0)) { + result = blosc_set_nthreads((int)nthreads); + if (result < 0) { return result; } + } + } + + /* Check for a BLOSC_NOLOCK environment variable. It is important + that this should be the last env var so that it can take the + previous ones into account */ + envvar = getenv("BLOSC_NOLOCK"); + if (envvar != NULL) { + result = blosc_decompress_ctx(src, dest, destsize, g_threads); + return result; + } + + pthread_mutex_lock(global_comp_mutex); + + result = blosc_run_decompression_with_context(g_global_context, src, dest, + destsize, g_threads); + + pthread_mutex_unlock(global_comp_mutex); + + return result; +} + +int blosc_getitem(const void* src, int start, int nitems, void* dest) { + uint8_t *_src=NULL; /* current pos for source buffer */ + uint8_t version, compversion; /* versions for compressed header */ + uint8_t flags; /* flags for header */ + int32_t ntbytes = 0; /* the number of uncompressed bytes */ + int32_t nblocks; /* number of total blocks in buffer */ + int32_t leftover; /* extra bytes at end of buffer */ + uint8_t *bstarts; /* start pointers for each block */ + int32_t typesize, blocksize, nbytes, compressedsize; + int32_t j, bsize, bsize2, leftoverblock; + int32_t cbytes, startb, stopb; + int stop = start + nitems; + uint8_t *tmp; + uint8_t *tmp2; + uint8_t *tmp3; + int32_t ebsize; + struct blosc_context context = {0}; + + _src = (uint8_t *)(src); + + /* Read the header block */ + version = _src[0]; /* blosc format version */ + compversion = _src[1]; + flags = _src[2]; /* flags */ + typesize = (int32_t)_src[3]; /* typesize */ + nbytes = sw32_(_src + 4); /* buffer size */ + blocksize = sw32_(_src + 8); /* block size */ + compressedsize = sw32_(_src + 12); /* compressed buffer size */ + + if (version != BLOSC_VERSION_FORMAT) + return -9; + + if (blocksize <= 0 || blocksize > nbytes || blocksize > BLOSC_MAX_BLOCKSIZE || + typesize <= 0 || typesize > BLOSC_MAX_TYPESIZE) { + return -1; + } + + /* Compute some params */ + /* Total blocks */ + nblocks = nbytes / blocksize; + leftover = nbytes % blocksize; + nblocks = (leftover>0)? nblocks+1: nblocks; + + /* Only initialize the fields blosc_d uses */ + context.typesize = typesize; + context.header_flags = &flags; + context.compversion = compversion; + context.compressedsize = compressedsize; + if (flags & BLOSC_MEMCPYED) { + if (nbytes + BLOSC_MAX_OVERHEAD != compressedsize) { + return -1; + } + } else { + ntbytes = initialize_decompress_func(&context); + if (ntbytes != 0) return ntbytes; + + if (nblocks >= (compressedsize - 16) / 4) { + return -1; + } + } + + ebsize = blocksize + typesize * (int32_t)sizeof(int32_t); + tmp = my_malloc(blocksize + ebsize + blocksize); + tmp2 = tmp + blocksize; + tmp3 = tmp + blocksize + ebsize; + + _src += 16; + bstarts = _src; + _src += sizeof(int32_t)*nblocks; + + /* Check region boundaries */ + if ((start < 0) || (start*typesize > nbytes)) { + fprintf(stderr, "`start` out of bounds"); + return -1; + } + + if ((stop < 0) || (stop*typesize > nbytes)) { + fprintf(stderr, "`start`+`nitems` out of bounds"); + return -1; + } + + for (j = 0; j < nblocks; j++) { + bsize = blocksize; + leftoverblock = 0; + if ((j == nblocks - 1) && (leftover > 0)) { + bsize = leftover; + leftoverblock = 1; + } + + /* Compute start & stop for each block */ + startb = start * typesize - j * blocksize; + stopb = stop * typesize - j * blocksize; + if ((startb >= (int)blocksize) || (stopb <= 0)) { + continue; + } + if (startb < 0) { + startb = 0; + } + if (stopb > (int)blocksize) { + stopb = blocksize; + } + bsize2 = stopb - startb; + + /* Do the actual data copy */ + if (flags & BLOSC_MEMCPYED) { + /* We want to memcpy only */ + fastcopy((uint8_t *) dest + ntbytes, + (uint8_t *) src + BLOSC_MAX_OVERHEAD + j * blocksize + startb, bsize2); + cbytes = bsize2; + } + else { + /* Regular decompression. Put results in tmp2. */ + cbytes = blosc_d(&context, bsize, leftoverblock, + (uint8_t *)src, sw32_(bstarts + j * 4), + tmp2, tmp, tmp3); + if (cbytes < 0) { + ntbytes = cbytes; + break; + } + /* Copy to destination */ + fastcopy((uint8_t *) dest + ntbytes, tmp2 + startb, bsize2); + cbytes = bsize2; + } + ntbytes += cbytes; + } + + my_free(tmp); + + return ntbytes; +} + +/* Decompress & unshuffle several blocks in a single thread */ +static void *t_blosc(void *ctxt) +{ + struct thread_context* context = (struct thread_context*)ctxt; + int32_t cbytes, ntdest; + int32_t tblocks; /* number of blocks per thread */ + int32_t leftover2; + int32_t tblock; /* limit block on a thread */ + int32_t nblock_; /* private copy of nblock */ + int32_t bsize, leftoverblock; + /* Parameters for threads */ + int32_t blocksize; + int32_t ebsize; + int32_t compress; + int32_t maxbytes; + int32_t ntbytes; + int32_t flags; + int32_t nblocks; + int32_t leftover; + uint8_t *bstarts; + const uint8_t *src; + uint8_t *dest; + uint8_t *tmp; + uint8_t *tmp2; + uint8_t *tmp3; + int rc; + (void)rc; // just to avoid 'unused-variable' warning + + while(1) + { + /* Synchronization point for all threads (wait for initialization) */ + WAIT_INIT(NULL, context->parent_context); + + if(context->parent_context->end_threads) + { + break; + } + + /* Get parameters for this thread before entering the main loop */ + blocksize = context->parent_context->blocksize; + ebsize = blocksize + context->parent_context->typesize * (int32_t)sizeof(int32_t); + compress = context->parent_context->compress; + flags = *(context->parent_context->header_flags); + maxbytes = context->parent_context->destsize; + nblocks = context->parent_context->nblocks; + leftover = context->parent_context->leftover; + bstarts = context->parent_context->bstarts; + src = context->parent_context->src; + dest = context->parent_context->dest; + + if (blocksize > context->tmpblocksize) + { + my_free(context->tmp); + context->tmp = my_malloc(blocksize + ebsize + blocksize); + context->tmp2 = context->tmp + blocksize; + context->tmp3 = context->tmp + blocksize + ebsize; + } + + tmp = context->tmp; + tmp2 = context->tmp2; + tmp3 = context->tmp3; + + ntbytes = 0; /* only useful for decompression */ + + if (compress && !(flags & BLOSC_MEMCPYED)) { + /* Compression always has to follow the block order */ + pthread_mutex_lock(&context->parent_context->count_mutex); + context->parent_context->thread_nblock++; + nblock_ = context->parent_context->thread_nblock; + pthread_mutex_unlock(&context->parent_context->count_mutex); + tblock = nblocks; + } + else { + /* Decompression can happen using any order. We choose + sequential block order on each thread */ + + /* Blocks per thread */ + tblocks = nblocks / context->parent_context->numthreads; + leftover2 = nblocks % context->parent_context->numthreads; + tblocks = (leftover2>0)? tblocks+1: tblocks; + + nblock_ = context->tid*tblocks; + tblock = nblock_ + tblocks; + if (tblock > nblocks) { + tblock = nblocks; + } + } + + /* Loop over blocks */ + leftoverblock = 0; + while ((nblock_ < tblock) && context->parent_context->thread_giveup_code > 0) { + bsize = blocksize; + if (nblock_ == (nblocks - 1) && (leftover > 0)) { + bsize = leftover; + leftoverblock = 1; + } + if (compress) { + if (flags & BLOSC_MEMCPYED) { + /* We want to memcpy only */ + fastcopy(dest + BLOSC_MAX_OVERHEAD + nblock_ * blocksize, + src + nblock_ * blocksize, bsize); + cbytes = bsize; + } + else { + /* Regular compression */ + cbytes = blosc_c(context->parent_context, bsize, leftoverblock, 0, ebsize, + src+nblock_*blocksize, tmp2, tmp, tmp3); + } + } + else { + if (flags & BLOSC_MEMCPYED) { + /* We want to memcpy only */ + fastcopy(dest + nblock_ * blocksize, + src + BLOSC_MAX_OVERHEAD + nblock_ * blocksize, bsize); + cbytes = bsize; + } + else { + cbytes = blosc_d(context->parent_context, bsize, leftoverblock, + src, sw32_(bstarts + nblock_ * 4), + dest+nblock_*blocksize, + tmp, tmp2); + } + } + + /* Check whether current thread has to giveup */ + if (context->parent_context->thread_giveup_code <= 0) { + break; + } + + /* Check results for the compressed/decompressed block */ + if (cbytes < 0) { /* compr/decompr failure */ + /* Set giveup_code error */ + pthread_mutex_lock(&context->parent_context->count_mutex); + context->parent_context->thread_giveup_code = cbytes; + pthread_mutex_unlock(&context->parent_context->count_mutex); + break; + } + + if (compress && !(flags & BLOSC_MEMCPYED)) { + /* Start critical section */ + pthread_mutex_lock(&context->parent_context->count_mutex); + ntdest = context->parent_context->num_output_bytes; + _sw32(bstarts + nblock_ * 4, ntdest); /* update block start counter */ + if ( (cbytes == 0) || (ntdest+cbytes > maxbytes) ) { + context->parent_context->thread_giveup_code = 0; /* incompressible buffer */ + pthread_mutex_unlock(&context->parent_context->count_mutex); + break; + } + context->parent_context->thread_nblock++; + nblock_ = context->parent_context->thread_nblock; + context->parent_context->num_output_bytes += cbytes; /* update return bytes counter */ + pthread_mutex_unlock(&context->parent_context->count_mutex); + /* End of critical section */ + + /* Copy the compressed buffer to destination */ + fastcopy(dest + ntdest, tmp2, cbytes); + } + else { + nblock_++; + /* Update counter for this thread */ + ntbytes += cbytes; + } + + } /* closes while (nblock_) */ + + /* Sum up all the bytes decompressed */ + if ((!compress || (flags & BLOSC_MEMCPYED)) && context->parent_context->thread_giveup_code > 0) { + /* Update global counter for all threads (decompression only) */ + pthread_mutex_lock(&context->parent_context->count_mutex); + context->parent_context->num_output_bytes += ntbytes; + pthread_mutex_unlock(&context->parent_context->count_mutex); + } + + /* Meeting point for all threads (wait for finalization) */ + WAIT_FINISH(NULL, context->parent_context); + } + + /* Cleanup our working space and context */ + my_free(context->tmp); + my_free(context); + + return(NULL); +} + + +static int init_threads(struct blosc_context* context) +{ + int32_t tid; + int rc2; + int32_t ebsize; + struct thread_context* thread_context; + + /* Initialize mutex and condition variable objects */ + pthread_mutex_init(&context->count_mutex, NULL); + + /* Set context thread sentinels */ + context->thread_giveup_code = 1; + context->thread_nblock = -1; + + /* Barrier initialization */ +#ifdef _POSIX_BARRIERS_MINE + pthread_barrier_init(&context->barr_init, NULL, context->numthreads+1); + pthread_barrier_init(&context->barr_finish, NULL, context->numthreads+1); +#else + pthread_mutex_init(&context->count_threads_mutex, NULL); + pthread_cond_init(&context->count_threads_cv, NULL); + context->count_threads = 0; /* Reset threads counter */ +#endif + +#if !defined(_WIN32) + /* Initialize and set thread detached attribute */ + pthread_attr_init(&context->ct_attr); + pthread_attr_setdetachstate(&context->ct_attr, PTHREAD_CREATE_JOINABLE); +#endif + + /* Finally, create the threads in detached state */ + for (tid = 0; tid < context->numthreads; tid++) { + context->tids[tid] = tid; + + /* Create a thread context thread owns context (will destroy when finished) */ + thread_context = (struct thread_context*)my_malloc(sizeof(struct thread_context)); + thread_context->parent_context = context; + thread_context->tid = tid; + + ebsize = context->blocksize + context->typesize * (int32_t)sizeof(int32_t); + thread_context->tmp = my_malloc(context->blocksize + ebsize + context->blocksize); + thread_context->tmp2 = thread_context->tmp + context->blocksize; + thread_context->tmp3 = thread_context->tmp + context->blocksize + ebsize; + thread_context->tmpblocksize = context->blocksize; + +#if !defined(_WIN32) + rc2 = pthread_create(&context->threads[tid], &context->ct_attr, t_blosc, (void *)thread_context); +#else + rc2 = pthread_create(&context->threads[tid], NULL, t_blosc, (void *)thread_context); +#endif + if (rc2) { + fprintf(stderr, "ERROR; return code from pthread_create() is %d\n", rc2); + fprintf(stderr, "\tError detail: %s\n", strerror(rc2)); + return(-1); + } + } + + + return(0); +} + +int blosc_get_nthreads(void) +{ + int ret = g_threads; + + return ret; +} + +int blosc_set_nthreads(int nthreads_new) +{ + int ret = g_threads; + + /* Check if should initialize */ + if (!g_initlib) blosc_init(); + + if (nthreads_new != ret){ + /* Re-initialize Blosc */ + blosc_destroy(); + blosc_init(); + g_threads = nthreads_new; + } + + return ret; +} + +int blosc_set_nthreads_(struct blosc_context* context) +{ + if (context->numthreads > BLOSC_MAX_THREADS) { + fprintf(stderr, + "Error. nthreads cannot be larger than BLOSC_MAX_THREADS (%d)", + BLOSC_MAX_THREADS); + return -1; + } + else if (context->numthreads <= 0) { + fprintf(stderr, "Error. nthreads must be a positive integer"); + return -1; + } + + /* Launch a new pool of threads */ + if (context->numthreads > 1 && context->numthreads != context->threads_started) { + blosc_release_threadpool(context); + if (init_threads(context) < 0) { + return -1; + } + } + + /* We have now started the threads */ + context->threads_started = context->numthreads; + + return context->numthreads; +} + +const char* blosc_get_compressor(void) +{ + const char* compname; + blosc_compcode_to_compname(g_compressor, &compname); + + return compname; +} + +int blosc_set_compressor(const char *compname) +{ + int code = blosc_compname_to_compcode(compname); + + g_compressor = code; + + /* Check if should initialize */ + if (!g_initlib) blosc_init(); + + return code; +} + +const char* blosc_list_compressors(void) +{ + static int compressors_list_done = 0; + static char ret[256]; + + if (compressors_list_done) return ret; + ret[0] = '\0'; + strcat(ret, BLOSC_BLOSCLZ_COMPNAME); +#if defined(HAVE_LZ4) + strcat(ret, ","); strcat(ret, BLOSC_LZ4_COMPNAME); + strcat(ret, ","); strcat(ret, BLOSC_LZ4HC_COMPNAME); +#endif /* HAVE_LZ4 */ +#if defined(HAVE_SNAPPY) + strcat(ret, ","); strcat(ret, BLOSC_SNAPPY_COMPNAME); +#endif /* HAVE_SNAPPY */ +#if defined(HAVE_ZLIB) + strcat(ret, ","); strcat(ret, BLOSC_ZLIB_COMPNAME); +#endif /* HAVE_ZLIB */ +#if defined(HAVE_ZSTD) + strcat(ret, ","); strcat(ret, BLOSC_ZSTD_COMPNAME); +#endif /* HAVE_ZSTD */ + compressors_list_done = 1; + return ret; +} + +const char* blosc_get_version_string(void) +{ + return BLOSC_VERSION_STRING; +} + +int blosc_get_complib_info(const char *compname, char **complib, char **version) +{ + int clibcode; + const char *clibname; + const char *clibversion = "unknown"; + +#if (defined(HAVE_LZ4) && defined(LZ4_VERSION_MAJOR)) || (defined(HAVE_SNAPPY) && defined(SNAPPY_VERSION)) || defined(ZSTD_VERSION_MAJOR) + char sbuffer[256]; +#endif + + clibcode = compname_to_clibcode(compname); + clibname = clibcode_to_clibname(clibcode); + + /* complib version */ + if (clibcode == BLOSC_BLOSCLZ_LIB) { + clibversion = BLOSCLZ_VERSION_STRING; + } +#if defined(HAVE_LZ4) + else if (clibcode == BLOSC_LZ4_LIB) { +#if defined(LZ4_VERSION_MAJOR) + sprintf(sbuffer, "%d.%d.%d", + LZ4_VERSION_MAJOR, LZ4_VERSION_MINOR, LZ4_VERSION_RELEASE); + clibversion = sbuffer; +#endif /* LZ4_VERSION_MAJOR */ + } +#endif /* HAVE_LZ4 */ +#if defined(HAVE_SNAPPY) + else if (clibcode == BLOSC_SNAPPY_LIB) { +#if defined(SNAPPY_VERSION) + sprintf(sbuffer, "%d.%d.%d", SNAPPY_MAJOR, SNAPPY_MINOR, SNAPPY_PATCHLEVEL); + clibversion = sbuffer; +#endif /* SNAPPY_VERSION */ + } +#endif /* HAVE_SNAPPY */ +#if defined(HAVE_ZLIB) + else if (clibcode == BLOSC_ZLIB_LIB) { + clibversion = ZLIB_VERSION; + } +#endif /* HAVE_ZLIB */ +#if defined(HAVE_ZSTD) + else if (clibcode == BLOSC_ZSTD_LIB) { + sprintf(sbuffer, "%d.%d.%d", + ZSTD_VERSION_MAJOR, ZSTD_VERSION_MINOR, ZSTD_VERSION_RELEASE); + clibversion = sbuffer; + } +#endif /* HAVE_ZSTD */ + else { + /* Unsupported library */ + if (complib != NULL) *complib = NULL; + if (version != NULL) *version = NULL; + return -1; + } + + if (complib != NULL) *complib = strdup(clibname); + if (version != NULL) *version = strdup(clibversion); + + return clibcode; +} + +/* Return `nbytes`, `cbytes` and `blocksize` from a compressed buffer. */ +void blosc_cbuffer_sizes(const void *cbuffer, size_t *nbytes, + size_t *cbytes, size_t *blocksize) +{ + uint8_t *_src = (uint8_t *)(cbuffer); /* current pos for source buffer */ + uint8_t version = _src[0]; /* version of header */ + + if (version != BLOSC_VERSION_FORMAT) { + *nbytes = *blocksize = *cbytes = 0; + return; + } + + /* Read the interesting values */ + *nbytes = (size_t)sw32_(_src + 4); /* uncompressed buffer size */ + *blocksize = (size_t)sw32_(_src + 8); /* block size */ + *cbytes = (size_t)sw32_(_src + 12); /* compressed buffer size */ +} + +int blosc_cbuffer_validate(const void* cbuffer, size_t cbytes, size_t* nbytes) { + size_t header_cbytes, header_blocksize; + if (cbytes < BLOSC_MIN_HEADER_LENGTH) return -1; + blosc_cbuffer_sizes(cbuffer, nbytes, &header_cbytes, &header_blocksize); + if (header_cbytes != cbytes) return -1; + if (*nbytes > BLOSC_MAX_BUFFERSIZE) return -1; + return 0; +} + +/* Return `typesize` and `flags` from a compressed buffer. */ +void blosc_cbuffer_metainfo(const void *cbuffer, size_t *typesize, + int *flags) +{ + uint8_t *_src = (uint8_t *)(cbuffer); /* current pos for source buffer */ + + uint8_t version = _src[0]; /* version of header */ + + if (version != BLOSC_VERSION_FORMAT) { + *flags = *typesize = 0; + return; + } + + /* Read the interesting values */ + *flags = (int)_src[2] & 7; /* first three flags */ + *typesize = (size_t)_src[3]; /* typesize */ +} + + +/* Return version information from a compressed buffer. */ +void blosc_cbuffer_versions(const void *cbuffer, int *version, + int *versionlz) +{ + uint8_t *_src = (uint8_t *)(cbuffer); /* current pos for source buffer */ + + /* Read the version info */ + *version = (int)_src[0]; /* blosc format version */ + *versionlz = (int)_src[1]; /* Lempel-Ziv compressor format version */ +} + + +/* Return the compressor library/format used in a compressed buffer. */ +const char *blosc_cbuffer_complib(const void *cbuffer) +{ + uint8_t *_src = (uint8_t *)(cbuffer); /* current pos for source buffer */ + int clibcode; + const char *complib; + + /* Read the compressor format/library info */ + clibcode = (_src[2] & 0xe0) >> 5; + complib = clibcode_to_clibname(clibcode); + return complib; +} + +/* Get the internal blocksize to be used during compression. 0 means + that an automatic blocksize is computed internally. */ +int blosc_get_blocksize(void) +{ + return (int)g_force_blocksize; +} + +/* Force the use of a specific blocksize. If 0, an automatic + blocksize will be used (the default). */ +void blosc_set_blocksize(size_t size) +{ + g_force_blocksize = (int32_t)size; +} + +/* Force the use of a specific split mode. */ +void blosc_set_splitmode(int mode) +{ + g_splitmode = mode; +} + +/* Child global context is invalid and pool threads no longer exist post-fork. + * Discard the old, inconsistent global context and global context mutex and + * mark as uninitialized. Subsequent calls through `blosc_*` interfaces will + * trigger re-init of the global context. + * + * All pthread interfaces have undefined behavior in child handler in current + * posix standards: https://pubs.opengroup.org/onlinepubs/9699919799/ + */ +void blosc_atfork_child(void) { + if (!g_initlib) return; + + g_initlib = 0; + + my_free(global_comp_mutex); + global_comp_mutex = NULL; + + my_free(g_global_context); + g_global_context = NULL; + +} + +void blosc_init(void) +{ + /* Return if we are already initialized */ + if (g_initlib) return; + + global_comp_mutex = (pthread_mutex_t*)my_malloc(sizeof(pthread_mutex_t)); + pthread_mutex_init(global_comp_mutex, NULL); + + g_global_context = (struct blosc_context*)my_malloc(sizeof(struct blosc_context)); + g_global_context->threads_started = 0; + + #if !defined(_WIN32) + /* atfork handlers are only be registered once, though multiple re-inits may + * occur via blosc_destroy/blosc_init. */ + if (!g_atfork_registered) { + g_atfork_registered = 1; + pthread_atfork(NULL, NULL, &blosc_atfork_child); + } + #endif + + g_initlib = 1; +} + +void blosc_destroy(void) +{ + /* Return if Blosc is not initialized */ + if (!g_initlib) return; + + g_initlib = 0; + + blosc_release_threadpool(g_global_context); + my_free(g_global_context); + g_global_context = NULL; + + pthread_mutex_destroy(global_comp_mutex); + my_free(global_comp_mutex); + global_comp_mutex = NULL; +} + +int blosc_release_threadpool(struct blosc_context* context) +{ + int32_t t; + void* status; + int rc; + int rc2; + (void)rc; // just to avoid 'unused-variable' warning + + if (context->threads_started > 0) + { + /* Tell all existing threads to finish */ + context->end_threads = 1; + + /* Sync threads */ + WAIT_INIT(-1, context); + + /* Join exiting threads */ + for (t=0; tthreads_started; t++) { + rc2 = pthread_join(context->threads[t], &status); + if (rc2) { + fprintf(stderr, "ERROR; return code from pthread_join() is %d\n", rc2); + fprintf(stderr, "\tError detail: %s\n", strerror(rc2)); + } + } + + /* Release mutex and condition variable objects */ + pthread_mutex_destroy(&context->count_mutex); + + /* Barriers */ + #ifdef _POSIX_BARRIERS_MINE + pthread_barrier_destroy(&context->barr_init); + pthread_barrier_destroy(&context->barr_finish); + #else + pthread_mutex_destroy(&context->count_threads_mutex); + pthread_cond_destroy(&context->count_threads_cv); + #endif + + /* Thread attributes */ + #if !defined(_WIN32) + pthread_attr_destroy(&context->ct_attr); + #endif + + } + + context->threads_started = 0; + + return 0; +} + +int blosc_free_resources(void) +{ + /* Return if Blosc is not initialized */ + if (!g_initlib) return -1; + + return blosc_release_threadpool(g_global_context); +} diff --git a/c-blosc/blosc/blosc.h b/c-blosc/blosc/blosc.h new file mode 100644 index 0000000..c9dc8ed --- /dev/null +++ b/c-blosc/blosc/blosc.h @@ -0,0 +1,535 @@ +/********************************************************************* + Blosc - Blocked Shuffling and Compression Library + + Author: Francesc Alted + + See LICENSE.txt for details about copyright and rights to use. +**********************************************************************/ +#ifndef BLOSC_H +#define BLOSC_H + +#include +#include +#include "blosc-export.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* Version numbers */ +#define BLOSC_VERSION_MAJOR 1 /* for major interface/format changes */ +#define BLOSC_VERSION_MINOR 21 /* for minor interface/format changes */ +#define BLOSC_VERSION_RELEASE 6 /* for tweaks, bug-fixes, or development */ + +#define BLOSC_VERSION_STRING "1.21.6" /* string version. Sync with above! */ +#define BLOSC_VERSION_REVISION "$Rev$" /* revision version */ +#define BLOSC_VERSION_DATE "$Date:: 2024-06-24 #$" /* date version */ + +/* The *_FORMAT symbols should be just 1-byte long */ +#define BLOSC_VERSION_FORMAT 2 /* Blosc format version, starting at 1 */ + +/* Minimum header length */ +#define BLOSC_MIN_HEADER_LENGTH 16 + +/* The maximum overhead during compression in bytes. This equals to + BLOSC_MIN_HEADER_LENGTH now, but can be higher in future + implementations */ +#define BLOSC_MAX_OVERHEAD BLOSC_MIN_HEADER_LENGTH + +/* Maximum source buffer size to be compressed */ +#define BLOSC_MAX_BUFFERSIZE (INT_MAX - BLOSC_MAX_OVERHEAD) + +/* Maximum typesize before considering source buffer as a stream of bytes */ +#define BLOSC_MAX_TYPESIZE 255 /* Cannot be larger than 255 */ + +/* Maximum supported blocksize. Decompression (getitem) requires a temporary + buffer of size 3*blocksize + sizeof(int32_t) * typesize. */ +#define BLOSC_MAX_BLOCKSIZE \ + ((INT_MAX - BLOSC_MAX_TYPESIZE * sizeof(int32_t)) / 3) + +/* The maximum number of threads (for some static arrays) */ +#define BLOSC_MAX_THREADS 256 + +/* Codes for shuffling (see blosc_compress) */ +#define BLOSC_NOSHUFFLE 0 /* no shuffle */ +#define BLOSC_SHUFFLE 1 /* byte-wise shuffle */ +#define BLOSC_BITSHUFFLE 2 /* bit-wise shuffle */ + +/* Codes for internal flags (see blosc_cbuffer_metainfo) */ +#define BLOSC_DOSHUFFLE 0x1 /* byte-wise shuffle */ +#define BLOSC_MEMCPYED 0x2 /* plain copy */ +#define BLOSC_DOBITSHUFFLE 0x4 /* bit-wise shuffle */ + +/* Codes for the different compressors shipped with Blosc */ +#define BLOSC_BLOSCLZ 0 +#define BLOSC_LZ4 1 +#define BLOSC_LZ4HC 2 +#define BLOSC_SNAPPY 3 +#define BLOSC_ZLIB 4 +#define BLOSC_ZSTD 5 + +/* Names for the different compressors shipped with Blosc */ +#define BLOSC_BLOSCLZ_COMPNAME "blosclz" +#define BLOSC_LZ4_COMPNAME "lz4" +#define BLOSC_LZ4HC_COMPNAME "lz4hc" +#define BLOSC_SNAPPY_COMPNAME "snappy" +#define BLOSC_ZLIB_COMPNAME "zlib" +#define BLOSC_ZSTD_COMPNAME "zstd" + +/* Codes for compression libraries shipped with Blosc (code must be < 8) */ +#define BLOSC_BLOSCLZ_LIB 0 +#define BLOSC_LZ4_LIB 1 +#define BLOSC_SNAPPY_LIB 2 +#define BLOSC_ZLIB_LIB 3 +#define BLOSC_ZSTD_LIB 4 + +/* Names for the different compression libraries shipped with Blosc */ +#define BLOSC_BLOSCLZ_LIBNAME "BloscLZ" +#define BLOSC_LZ4_LIBNAME "LZ4" +#define BLOSC_SNAPPY_LIBNAME "Snappy" +#define BLOSC_ZLIB_LIBNAME "Zlib" +#define BLOSC_ZSTD_LIBNAME "Zstd" + +/* The codes for compressor formats shipped with Blosc */ +#define BLOSC_BLOSCLZ_FORMAT BLOSC_BLOSCLZ_LIB +#define BLOSC_LZ4_FORMAT BLOSC_LZ4_LIB +#define BLOSC_LZ4HC_FORMAT BLOSC_LZ4_LIB /* LZ4HC and LZ4 share the same format */ +#define BLOSC_SNAPPY_FORMAT BLOSC_SNAPPY_LIB +#define BLOSC_ZLIB_FORMAT BLOSC_ZLIB_LIB +#define BLOSC_ZSTD_FORMAT BLOSC_ZSTD_LIB + + +/* The version formats for compressors shipped with Blosc */ +/* All versions here starts at 1 */ +#define BLOSC_BLOSCLZ_VERSION_FORMAT 1 +#define BLOSC_LZ4_VERSION_FORMAT 1 +#define BLOSC_LZ4HC_VERSION_FORMAT 1 /* LZ4HC and LZ4 share the same format */ +#define BLOSC_SNAPPY_VERSION_FORMAT 1 +#define BLOSC_ZLIB_VERSION_FORMAT 1 +#define BLOSC_ZSTD_VERSION_FORMAT 1 + +/* Split mode for blocks. NEVER and ALWAYS are for experimenting with best compression ratio, + * AUTO for optimal behaviour (based on experiments), and FORWARD_COMPAT provides + * best forward compatibility */ +#define BLOSC_ALWAYS_SPLIT 1 +#define BLOSC_NEVER_SPLIT 2 +#define BLOSC_AUTO_SPLIT 3 +#define BLOSC_FORWARD_COMPAT_SPLIT 4 + +/** + Initialize the Blosc library environment. + + You must call this previous to any other Blosc call, unless you want + Blosc to be used simultaneously in a multi-threaded environment, in + which case you should *exclusively* use the + blosc_compress_ctx()/blosc_decompress_ctx() pair (see below). + */ +BLOSC_EXPORT void blosc_init(void); + + +/** + Destroy the Blosc library environment. + + You must call this after to you are done with all the Blosc calls, + unless you have not used blosc_init() before (see blosc_init() + above). + */ +BLOSC_EXPORT void blosc_destroy(void); + + +/** + Compress a block of data in the `src` buffer and returns the size of + the compressed block. The size of `src` buffer is specified by + `nbytes`. There is not a minimum for `src` buffer size (`nbytes`). + + `clevel` is the desired compression level and must be a number + between 0 (no compression) and 9 (maximum compression). + + `doshuffle` specifies whether the shuffle compression filters + should be applied or not. BLOSC_NOSHUFFLE means not applying it, + BLOSC_SHUFFLE means applying it at a byte level and BLOSC_BITSHUFFLE + at a bit level (slower but may achieve better entropy alignment). + + `typesize` is the number of bytes for the atomic type in binary + `src` buffer. This is mainly useful for the shuffle filters. + For implementation reasons, only a 1 < `typesize` < 256 will allow the + shuffle filter to work. When `typesize` is not in this range, shuffle + will be silently disabled. + + The `dest` buffer must have at least the size of `destsize`. Blosc + guarantees that if you set `destsize` to, at least, + (`nbytes` + BLOSC_MAX_OVERHEAD), the compression will always succeed. + The `src` buffer and the `dest` buffer can not overlap. + + Compression is memory safe and guaranteed not to write the `dest` + buffer beyond what is specified in `destsize`. + + If `src` buffer cannot be compressed into `destsize`, the return + value is zero and you should discard the contents of the `dest` + buffer. + + A negative return value means that an internal error happened. This + should never happen. If you see this, please report it back + together with the buffer data causing this and compression settings. + + Environment variables + --------------------- + + blosc_compress() honors different environment variables to control + internal parameters without the need of doing that programmatically. + Here are the ones supported: + + BLOSC_CLEVEL=(INTEGER): This will overwrite the `clevel` parameter + before the compression process starts. + + BLOSC_SHUFFLE=[NOSHUFFLE | SHUFFLE | BITSHUFFLE]: This will + overwrite the `doshuffle` parameter before the compression process + starts. + + BLOSC_TYPESIZE=(INTEGER): This will overwrite the `typesize` + parameter before the compression process starts. + + BLOSC_COMPRESSOR=[BLOSCLZ | LZ4 | LZ4HC | SNAPPY | ZLIB]: This will + call blosc_set_compressor(BLOSC_COMPRESSOR) before the compression + process starts. + + BLOSC_NTHREADS=(INTEGER): This will call + blosc_set_nthreads(BLOSC_NTHREADS) before the compression process + starts. + + BLOSC_BLOCKSIZE=(INTEGER): This will call + blosc_set_blocksize(BLOSC_BLOCKSIZE) before the compression process + starts. *NOTE:* The blocksize is a critical parameter with + important restrictions in the allowed values, so use this with care. + + BLOSC_NOLOCK=(ANY VALUE): This will call blosc_compress_ctx() under + the hood, with the `compressor`, `blocksize` and + `numinternalthreads` parameters set to the same as the last calls to + blosc_set_compressor(), blosc_set_blocksize() and + blosc_set_nthreads(). BLOSC_CLEVEL, BLOSC_SHUFFLE, BLOSC_TYPESIZE + environment vars will also be honored. + + BLOSC_SPLITMODE=[ FORWARD_COMPAT | AUTO | ALWAYS | NEVER ]: + This will call blosc_set_splitmode() with the different supported values. + See blosc_set_splitmode() docstrings for more info on each mode. + + BLOSC_WARN=(INTEGER): This will print some warning message on stderr + showing more info in situations where data inputs cannot be compressed. + The values can range from 1 (less verbose) to 10 (full verbose). 0 is + the same as if the BLOSC_WARN envvar was not defined. + */ +BLOSC_EXPORT int blosc_compress(int clevel, int doshuffle, size_t typesize, + size_t nbytes, const void *src, void *dest, + size_t destsize); + + +/** + Context interface to blosc compression. This does not require a call + to blosc_init() and can be called from multithreaded applications + without the global lock being used, so allowing Blosc be executed + simultaneously in those scenarios. + + It uses the same parameters than the blosc_compress() function plus: + + `compressor`: the string representing the type of compressor to use. + + `blocksize`: the requested size of the compressed blocks. If 0, an + automatic blocksize will be used. + + `numinternalthreads`: the number of threads to use internally. + + A negative return value means that an internal error happened. This + should never happen. If you see this, please report it back + together with the buffer data causing this and compression settings. +*/ +BLOSC_EXPORT int blosc_compress_ctx(int clevel, int doshuffle, size_t typesize, + size_t nbytes, const void* src, void* dest, + size_t destsize, const char* compressor, + size_t blocksize, int numinternalthreads); + +/** + Decompress a block of compressed data in `src`, put the result in + `dest` and returns the size of the decompressed block. + + Call `blosc_cbuffer_validate` to determine the size of the destination buffer. + + The `src` buffer and the `dest` buffer can not overlap. + + Decompression is memory safe and guaranteed not to write the `dest` + buffer beyond what is specified in `destsize`. + + If an error occurs, e.g. the compressed data is corrupted or the + output buffer is not large enough, then 0 (zero) or a negative value + will be returned instead. + + Environment variables + --------------------- + + blosc_decompress() honors different environment variables to control + internal parameters without the need of doing that programmatically. + Here are the ones supported: + + BLOSC_NTHREADS=(INTEGER): This will call + blosc_set_nthreads(BLOSC_NTHREADS) before the proper decompression + process starts. + + BLOSC_NOLOCK=(ANY VALUE): This will call blosc_decompress_ctx() + under the hood, with the `numinternalthreads` parameter set to the + same value as the last call to blosc_set_nthreads(). +*/ +BLOSC_EXPORT int blosc_decompress(const void *src, void *dest, size_t destsize); + +/** + Context interface to blosc decompression. This does not require a + call to blosc_init() and can be called from multithreaded + applications without the global lock being used, so allowing Blosc + be executed simultaneously in those scenarios. + + Call `blosc_cbuffer_validate` to determine the size of the destination buffer. + + It uses the same parameters than the blosc_decompress() function plus: + + `numinternalthreads`: number of threads to use internally. + + Decompression is memory safe and guaranteed not to write the `dest` + buffer more than what is specified in `destsize`. + + If an error occurs, e.g. the compressed data is corrupted or the + output buffer is not large enough, then 0 (zero) or a negative value + will be returned instead. +*/ +BLOSC_EXPORT int blosc_decompress_ctx(const void *src, void *dest, + size_t destsize, int numinternalthreads); + +/** + Get `nitems` (of typesize size) in `src` buffer starting in `start`. + The items are returned in `dest` buffer, which has to have enough + space for storing all items. + + Returns the number of bytes copied to `dest` or a negative value if + some error happens. + */ +BLOSC_EXPORT int blosc_getitem(const void *src, int start, int nitems, void *dest); + +/** + Returns the current number of threads that are used for + compression/decompression. + */ +BLOSC_EXPORT int blosc_get_nthreads(void); + + +/** + Initialize a pool of threads for compression/decompression. If + `nthreads` is 1, then the serial version is chosen and a possible + previous existing pool is ended. If this is not called, `nthreads` + is set to 1 internally. + + Returns the previous number of threads. + */ +BLOSC_EXPORT int blosc_set_nthreads(int nthreads); + + +/** + Returns the current compressor that is being used for compression. + */ +BLOSC_EXPORT const char* blosc_get_compressor(void); + + +/** + Select the compressor to be used. The supported ones are "blosclz", + "lz4", "lz4hc", "snappy", "zlib" and "zstd". If this function is not + called, then "blosclz" will be used by default. + + In case the compressor is not recognized, or there is not support + for it in this build, it returns a -1. Else it returns the code for + the compressor (>=0). + */ +BLOSC_EXPORT int blosc_set_compressor(const char* compname); + + +/** + Get the `compname` associated with the `compcode`. + + If the compressor code is not recognized, or there is not support + for it in this build, -1 is returned. Else, the compressor code is + returned. + */ +BLOSC_EXPORT int blosc_compcode_to_compname(int compcode, const char **compname); + + +/** + Return the compressor code associated with the compressor name. + + If the compressor name is not recognized, or there is not support + for it in this build, -1 is returned instead. + */ +BLOSC_EXPORT int blosc_compname_to_compcode(const char *compname); + + +/** + Get a list of compressors supported in the current build. The + returned value is a string with a concatenation of "blosclz", "lz4", + "lz4hc", "snappy", "zlib" or "zstd "separated by commas, depending + on which ones are present in the build. + + This function does not leak, so you should not free() the returned + list. + + This function should always succeed. + */ +BLOSC_EXPORT const char* blosc_list_compressors(void); + +/** + Return the version of the C-Blosc library in string format. + + Useful for dynamic libraries. +*/ +BLOSC_EXPORT const char* blosc_get_version_string(void); + + +/** + Get info from compression libraries included in the current build. + In `compname` you pass the compressor name that you want info from. + + In `complib` and `version` you get a pointer to the compressor + library name and the version in string format respectively. After + using the name and version, you should free() them so as to avoid + leaks. If any of `complib` and `version` are NULL, they will not be + assigned to anything, and the user should not need to free them. + + If the compressor is supported, it returns the code for the library + (>=0). If it is not supported, this function returns -1. + */ +BLOSC_EXPORT int blosc_get_complib_info(const char *compname, char **complib, char **version); + + +/** + Free possible memory temporaries and thread resources. Use this + when you are not going to use Blosc for a long while. In case of + problems releasing the resources, it returns a negative number, else + it returns 0. + */ +BLOSC_EXPORT int blosc_free_resources(void); + + +/** + Return information about a compressed buffer, namely the number of + uncompressed bytes (`nbytes`) and compressed (`cbytes`). It also + returns the `blocksize` (which is used internally for doing the + compression by blocks). + + You only need to pass the first BLOSC_MIN_HEADER_LENGTH bytes of a + compressed buffer for this call to work. + + If the format is not supported by the library, all output arguments will be + filled with zeros. + */ +BLOSC_EXPORT void blosc_cbuffer_sizes(const void *cbuffer, size_t *nbytes, + size_t *cbytes, size_t *blocksize); + +/** + Checks that the compressed buffer starting at `cbuffer` of length `cbytes` may + contain valid blosc compressed data, and that it is safe to call + blosc_decompress/blosc_decompress_ctx/blosc_getitem. + + On success, returns 0 and sets *nbytes to the size of the uncompressed data. + This does not guarantee that the decompression function won't return an error, + but does guarantee that it is safe to attempt decompression. + + On failure, returns -1. + */ +BLOSC_EXPORT int blosc_cbuffer_validate(const void* cbuffer, size_t cbytes, + size_t* nbytes); + +/** + Return meta-information about a compressed buffer, namely the type size + (`typesize`), as well as some internal `flags`. + + The `flags` is a set of bits, where the used ones are: + * bit 0: whether the shuffle filter has been applied or not + * bit 1: whether the internal buffer is a pure memcpy or not + * bit 2: whether the bit shuffle filter has been applied or not + + You can use the `BLOSC_DOSHUFFLE`, `BLOSC_DOBITSHUFFLE` and + `BLOSC_MEMCPYED` symbols for extracting the interesting bits + (e.g. ``flags & BLOSC_DOSHUFFLE`` says whether the buffer is + byte-shuffled or not). + + You only need to pass the first BLOSC_MIN_HEADER_LENGTH bytes of a + compressed buffer for this call to work. + + If the format is not supported by the library, all output arguments will be + filled with zeros. + */ +BLOSC_EXPORT void blosc_cbuffer_metainfo(const void *cbuffer, size_t *typesize, + int *flags); + + +/** + Return information about a compressed buffer, namely the internal + Blosc format version (`version`) and the format for the internal + compressor used (`compversion`). + + This function should always succeed. + */ +BLOSC_EXPORT void blosc_cbuffer_versions(const void *cbuffer, int *version, + int *compversion); + + +/** + Return the compressor library/format used in a compressed buffer. + + This function should always succeed. + */ +BLOSC_EXPORT const char *blosc_cbuffer_complib(const void *cbuffer); + + + +/********************************************************************* + + Low-level functions follows. Use them only if you are an expert! + +*********************************************************************/ + +/** + Get the internal blocksize to be used during compression. 0 means + that an automatic blocksize is computed internally (the default). + */ +BLOSC_EXPORT int blosc_get_blocksize(void); + +/** + Force the use of a specific blocksize. If 0, an automatic + blocksize will be used (the default). + + The blocksize is a critical parameter with important restrictions in + the allowed values, so use this with care. + */ +BLOSC_EXPORT void blosc_set_blocksize(size_t blocksize); + +/** + Set the split mode. + + This function can take the next values: + * BLOSC_FORWARD_COMPAT_SPLIT + * BLOSC_AUTO_SPLIT + * BLOSC_NEVER_SPLIT + * BLOSC_ALWAYS_SPLIT + + BLOSC_FORWARD_COMPAT offers reasonably forward compatibility, + BLOSC_AUTO_SPLIT is for nearly optimal results (based on heuristics), + BLOSC_NEVER_SPLIT and BLOSC_ALWAYS_SPLIT are for the user experimenting + when trying to get best compression ratios and/or speed. + + If not called, the default mode is BLOSC_FORWARD_COMPAT_SPLIT. + + This function should always succeed. + */ +BLOSC_EXPORT void blosc_set_splitmode(int splitmode); + + +#ifdef __cplusplus +} +#endif + + +#endif diff --git a/c-blosc/blosc/blosclz.c b/c-blosc/blosc/blosclz.c new file mode 100644 index 0000000..f5ca10a --- /dev/null +++ b/c-blosc/blosc/blosclz.c @@ -0,0 +1,789 @@ +/********************************************************************* + Blosc - Blocked Shuffling and Compression Library + + Copyright (c) 2021 The Blosc Developers + https://blosc.org + License: BSD 3-Clause (see LICENSE.txt) + + See LICENSE.txt for details about copyright and rights to use. +**********************************************************************/ + +/********************************************************************* + The code in this file is heavily based on FastLZ, a lightning-fast + lossless compression library. See LICENSES/FASTLZ.txt for details. +**********************************************************************/ + + +#include +#include +#include "blosclz.h" +#include "fastcopy.h" +#include "blosc-common.h" + + +/* + * Give hints to the compiler for branch prediction optimization. + */ +#if defined(__GNUC__) && (__GNUC__ > 2) +#define BLOSCLZ_LIKELY(c) (__builtin_expect((c), 1)) +#define BLOSCLZ_UNLIKELY(c) (__builtin_expect((c), 0)) +#else +#define BLOSCLZ_LIKELY(c) (c) +#define BLOSCLZ_UNLIKELY(c) (c) +#endif + +/* + * Use inlined functions for supported systems. + */ +#if defined(_MSC_VER) && !defined(__cplusplus) /* Visual Studio */ +#define inline __inline /* Visual C is not C99, but supports some kind of inline */ +#endif + +#define MAX_COPY 32U +#define MAX_DISTANCE 8191 +#define MAX_FARDISTANCE (65535 + MAX_DISTANCE - 1) + +#ifdef BLOSC_STRICT_ALIGN + #define BLOSCLZ_READU16(p) ((p)[0] | (p)[1]<<8) + #define BLOSCLZ_READU32(p) ((p)[0] | (p)[1]<<8 | (p)[2]<<16 | (p)[3]<<24) +#else + #define BLOSCLZ_READU16(p) *((const uint16_t*)(p)) + #define BLOSCLZ_READU32(p) *((const uint32_t*)(p)) +#endif + +#define HASH_LOG (14U) +#define HASH_LOG2 (12U) + +// This is used in LZ4 and seems to work pretty well here too +#define HASH_FUNCTION(v, s, h) { \ + (v) = ((s) * 2654435761U) >> (32U - (h)); \ +} + + +#if defined(__AVX2__) +static uint8_t *get_run_32(uint8_t *ip, const uint8_t *ip_bound, const uint8_t *ref) { + uint8_t x = ip[-1]; + + while (ip < (ip_bound - (sizeof(__m256i)))) { + __m256i value, value2, cmp; + /* Broadcast the value for every byte in a 256-bit register */ + memset(&value, x, sizeof(__m256i)); + value2 = _mm256_loadu_si256((__m256i *)ref); + cmp = _mm256_cmpeq_epi64(value, value2); + if ((unsigned)_mm256_movemask_epi8(cmp) != 0xFFFFFFFF) { + /* Return the byte that starts to differ */ + while (*ref++ == x) ip++; + return ip; + } + else { + ip += sizeof(__m256i); + ref += sizeof(__m256i); + } + } + /* Look into the remainder */ + while ((ip < ip_bound) && (*ref++ == x)) ip++; + return ip; +} +#endif + +#if defined(__SSE2__) +uint8_t *get_run_16(uint8_t *ip, const uint8_t *ip_bound, const uint8_t *ref) { + uint8_t x = ip[-1]; + + while (ip < (ip_bound - sizeof(__m128i))) { + __m128i value, value2, cmp; + /* Broadcast the value for every byte in a 128-bit register */ + memset(&value, x, sizeof(__m128i)); + value2 = _mm_loadu_si128((__m128i *)ref); + cmp = _mm_cmpeq_epi32(value, value2); + if (_mm_movemask_epi8(cmp) != 0xFFFF) { + /* Return the byte that starts to differ */ + while (*ref++ == x) ip++; + return ip; + } + else { + ip += sizeof(__m128i); + ref += sizeof(__m128i); + } + } + /* Look into the remainder */ + while ((ip < ip_bound) && (*ref++ == x)) ip++; + return ip; +} + +#endif + + +static uint8_t *get_run(uint8_t *ip, const uint8_t *ip_bound, const uint8_t *ref) { + uint8_t x = ip[-1]; + int64_t value, value2; + /* Broadcast the value for every byte in a 64-bit register */ + memset(&value, x, 8); + /* safe because the outer check against ip limit */ + while (ip < (ip_bound - sizeof(int64_t))) { +#if defined(BLOSC_STRICT_ALIGN) + memcpy(&value2, ref, 8); +#else + value2 = ((int64_t*)ref)[0]; +#endif + if (value != value2) { + /* Return the byte that starts to differ */ + while (*ref++ == x) ip++; + return ip; + } + else { + ip += 8; + ref += 8; + } + } + /* Look into the remainder */ + while ((ip < ip_bound) && (*ref++ == x)) ip++; + return ip; +} + + +/* Return the byte that starts to differ */ +uint8_t *get_match(uint8_t *ip, const uint8_t *ip_bound, const uint8_t *ref) { +#if !defined(BLOSC_STRICT_ALIGN) + while (ip < (ip_bound - sizeof(int64_t))) { + if (*(int64_t*)ref != *(int64_t*)ip) { + /* Return the byte that starts to differ */ + while (*ref++ == *ip++) {} + return ip; + } + else { + ip += sizeof(int64_t); + ref += sizeof(int64_t); + } + } +#endif + /* Look into the remainder */ + while ((ip < ip_bound) && (*ref++ == *ip++)) {} + return ip; +} + + +#if defined(__SSE2__) +static uint8_t *get_match_16(uint8_t *ip, const uint8_t *ip_bound, const uint8_t *ref) { + __m128i value, value2, cmp; + + while (ip < (ip_bound - sizeof(__m128i))) { + value = _mm_loadu_si128((__m128i *) ip); + value2 = _mm_loadu_si128((__m128i *) ref); + cmp = _mm_cmpeq_epi32(value, value2); + if (_mm_movemask_epi8(cmp) != 0xFFFF) { + /* Return the byte that starts to differ */ + while (*ref++ == *ip++) {} + return ip; + } + else { + ip += sizeof(__m128i); + ref += sizeof(__m128i); + } + } + /* Look into the remainder */ + while ((ip < ip_bound) && (*ref++ == *ip++)) {} + return ip; +} +#endif + + +#if defined(__AVX2__) +static uint8_t *get_match_32(uint8_t *ip, const uint8_t *ip_bound, const uint8_t *ref) { + + while (ip < (ip_bound - sizeof(__m256i))) { + __m256i value, value2, cmp; + value = _mm256_loadu_si256((__m256i *) ip); + value2 = _mm256_loadu_si256((__m256i *)ref); + cmp = _mm256_cmpeq_epi64(value, value2); + if ((unsigned)_mm256_movemask_epi8(cmp) != 0xFFFFFFFF) { + /* Return the byte that starts to differ */ + while (*ref++ == *ip++) {} + return ip; + } + else { + ip += sizeof(__m256i); + ref += sizeof(__m256i); + } + } + /* Look into the remainder */ + while ((ip < ip_bound) && (*ref++ == *ip++)) {} + return ip; +} +#endif + + +static uint8_t* get_run_or_match(uint8_t* ip, uint8_t* ip_bound, const uint8_t* ref, bool run) { + if (BLOSCLZ_UNLIKELY(run)) { +#if defined(__AVX2__) + // Extensive experiments on AMD Ryzen3 say that regular get_run is faster + // ip = get_run_32(ip, ip_bound, ref); + ip = get_run(ip, ip_bound, ref); +#elif defined(__SSE2__) + // Extensive experiments on AMD Ryzen3 say that regular get_run is faster + // ip = get_run_16(ip, ip_bound, ref); + ip = get_run(ip, ip_bound, ref); +#else + ip = get_run(ip, ip_bound, ref); +#endif + } + else { +#if defined(__AVX2__) + // Extensive experiments on AMD Ryzen3 say that regular get_match_16 is faster + // ip = get_match_32(ip, ip_bound, ref); + ip = get_match_16(ip, ip_bound, ref); +#elif defined(__SSE2__) + ip = get_match_16(ip, ip_bound, ref); +#else + ip = get_match(ip, ip_bound, ref); +#endif + } + + return ip; +} + + +#define LITERAL(ip, op, op_limit, anchor, copy) { \ + if (BLOSCLZ_UNLIKELY((op) + 2 > (op_limit))) \ + goto out; \ + *(op)++ = *(anchor)++; \ + (ip) = (anchor); \ + (copy)++; \ + if (BLOSCLZ_UNLIKELY((copy) == MAX_COPY)) { \ + (copy) = 0; \ + *(op)++ = MAX_COPY-1; \ + } \ +} + +#define LITERAL2(ip, anchor, copy) { \ + oc++; (anchor)++; \ + (ip) = (anchor); \ + (copy)++; \ + if (BLOSCLZ_UNLIKELY((copy) == MAX_COPY)) { \ + (copy) = 0; \ + oc++; \ + } \ +} + +#define MATCH_SHORT(op, op_limit, len, distance) { \ + if (BLOSCLZ_UNLIKELY((op) + 2 > (op_limit))) \ + goto out; \ + *(op)++ = (uint8_t)(((len) << 5U) + ((distance) >> 8U));\ + *(op)++ = (uint8_t)(((distance) & 255U)); \ +} + +#define MATCH_LONG(op, op_limit, len, distance) { \ + if (BLOSCLZ_UNLIKELY((op) + 1 > (op_limit))) \ + goto out; \ + *(op)++ = (uint8_t)((7U << 5U) + ((distance) >> 8U)); \ + for ((len) -= 7; (len) >= 255; (len) -= 255) { \ + if (BLOSCLZ_UNLIKELY((op) + 1 > (op_limit))) \ + goto out; \ + *(op)++ = 255; \ + } \ + if (BLOSCLZ_UNLIKELY((op) + 2 > (op_limit))) \ + goto out; \ + *(op)++ = (uint8_t)(len); \ + *(op)++ = (uint8_t)(((distance) & 255U)); \ +} + +#define MATCH_SHORT_FAR(op, op_limit, len, distance) { \ + if (BLOSCLZ_UNLIKELY((op) + 4 > (op_limit))) \ + goto out; \ + *(op)++ = (uint8_t)(((len) << 5U) + 31); \ + *(op)++ = 255; \ + *(op)++ = (uint8_t)((distance) >> 8U); \ + *(op)++ = (uint8_t)((distance) & 255U); \ +} + +#define MATCH_LONG_FAR(op, op_limit, len, distance) { \ + if (BLOSCLZ_UNLIKELY((op) + 1 > (op_limit))) \ + goto out; \ + *(op)++ = (7U << 5U) + 31; \ + for ((len) -= 7; (len) >= 255; (len) -= 255) { \ + if (BLOSCLZ_UNLIKELY((op) + 1 > (op_limit))) \ + goto out; \ + *(op)++ = 255; \ + } \ + if (BLOSCLZ_UNLIKELY((op) + 4 > (op_limit))) \ + goto out; \ + *(op)++ = (uint8_t)(len); \ + *(op)++ = 255; \ + *(op)++ = (uint8_t)((distance) >> 8U); \ + *(op)++ = (uint8_t)((distance) & 255U); \ +} + + +// Get a guess for the compressed size of a buffer +static double get_cratio(uint8_t* ibase, int maxlen, int minlen, int ipshift) { + uint8_t* ip = ibase; + int32_t oc = 0; + const uint16_t hashlen = (1U << (uint8_t)HASH_LOG2); + uint16_t htab[1U << (uint8_t)HASH_LOG2]; + uint32_t hval; + uint32_t seq; + uint8_t copy; + // Make a tradeoff between testing too much and too little + uint16_t limit = (maxlen > hashlen) ? hashlen : maxlen; + uint8_t* ip_bound = ibase + limit - 1; + uint8_t* ip_limit = ibase + limit - 12; + + // Initialize the hash table to distances of 0 + memset(htab, 0, hashlen * sizeof(uint16_t)); + + /* we start with literal copy */ + copy = 4; + oc += 5; + + /* main loop */ + while (BLOSCLZ_LIKELY(ip < ip_limit)) { + const uint8_t* ref; + unsigned distance; + uint8_t* anchor = ip; /* comparison starting-point */ + + /* find potential match */ + seq = BLOSCLZ_READU32(ip); + HASH_FUNCTION(hval, seq, HASH_LOG2) + ref = ibase + htab[hval]; + + /* calculate distance to the match */ + distance = (unsigned int)(anchor - ref); + + /* update hash table */ + htab[hval] = (uint16_t) (anchor - ibase); + + if (distance == 0 || (distance >= MAX_FARDISTANCE)) { + LITERAL2(ip, anchor, copy) + continue; + } + + /* is this a match? check the first 4 bytes */ + if (BLOSCLZ_READU32(ref) == BLOSCLZ_READU32(ip)) { + ref += 4; + } + else { + /* no luck, copy as a literal */ + LITERAL2(ip, anchor, copy) + continue; + } + + /* last matched byte */ + ip = anchor + 4; + + /* distance is biased */ + distance--; + + /* get runs or matches; zero distance means a run */ + ip = get_run_or_match(ip, ip_bound, ref, !distance); + + ip -= ipshift; + int len = (int)(ip - anchor); + if (len < minlen) { + LITERAL2(ip, anchor, copy) + continue; + } + + /* if we haven't copied anything, adjust the output counter */ + if (!copy) + oc--; + /* reset literal counter */ + copy = 0; + + /* encode the match */ + if (distance < MAX_DISTANCE) { + if (len >= 7) { + oc += ((len - 7) / 255) + 1; + } + oc += 2; + } + else { + /* far away, but not yet in the another galaxy... */ + if (len >= 7) { + oc += ((len - 7) / 255) + 1; + } + oc += 4; + } + + /* update the hash at match boundary */ + seq = BLOSCLZ_READU32(ip); + HASH_FUNCTION(hval, seq, HASH_LOG2) + htab[hval] = (uint16_t)(ip++ - ibase); + ip++; + /* assuming literal copy */ + oc++; + } + + double ic = (double)(ip - ibase); + return ic / (double)oc; +} + + +int blosclz_compress(const int clevel, const void* input, int length, + void* output, int maxout, const int split_block) { + uint8_t* ibase = (uint8_t*)input; + + // Experiments say that checking 1/4 of the buffer is enough to figure out approx cratio + int maxlen = length / 4; + // Start probing somewhere inside the buffer + int shift = length - maxlen; + // Actual entropy probing! + double cratio = get_cratio(ibase + shift, maxlen, 3, 3); + // discard probes with small compression ratios (too expensive) + double cratio_[10] = {0, 2, 1.5, 1.2, 1.2, 1.2, 1.2, 1.15, 1.1, 1.0}; + if (cratio < cratio_[clevel]) { + goto out; + } + + /* When we go back in a match (shift), we obtain quite different compression properties. + * It looks like 4 is more useful in combination with bitshuffle and small typesizes + * Fallback to 4 because it provides more consistent results for large cratios. + * + * In this block we also check cratios for the beginning of the buffers and + * eventually discard those that are small (take too long to decompress). + * This process is called _entropy probing_. + */ + unsigned ipshift = 4; + // Compute optimal shift and minimum lengths for encoding + // Use 4 by default, except for low entropy data, where we should do a best effort + unsigned minlen = 4; + // BloscLZ works better with splits mostly, so when data is not split, do a best effort + // Why using cratio < 4 is based in experiments with low and high entropy + if (!split_block || cratio < 4) { + ipshift = 3; + minlen = 3; + } + else { + minlen = 4; + } + + uint8_t hashlog_[10] = {0, HASH_LOG - 2, HASH_LOG - 1, HASH_LOG, HASH_LOG, + HASH_LOG, HASH_LOG, HASH_LOG, HASH_LOG, HASH_LOG}; + uint8_t hashlog = hashlog_[clevel]; + + uint8_t* ip = ibase; + uint8_t* ip_bound = ibase + length - 1; + uint8_t* ip_limit = ibase + length - 12; + uint8_t* op = (uint8_t*)output; + const uint8_t* op_limit = op + maxout; + uint32_t seq; + uint8_t copy; + uint32_t hval; + + /* input and output buffer cannot be less than 16 and 66 bytes or we can get into trouble */ + if (length < 16 || maxout < 66) { + return 0; + } + + // Initialize the hash table + uint32_t htab[1U << (uint8_t)HASH_LOG]; + memset(htab, 0, (1U << hashlog) * sizeof(uint32_t)); + + /* we start with literal copy */ + copy = 4; + *op++ = MAX_COPY - 1; + *op++ = *ip++; + *op++ = *ip++; + *op++ = *ip++; + *op++ = *ip++; + + /* main loop */ + while (BLOSCLZ_LIKELY(ip < ip_limit)) { + const uint8_t* ref; + unsigned distance; + uint8_t* anchor = ip; /* comparison starting-point */ + + /* find potential match */ + seq = BLOSCLZ_READU32(ip); + HASH_FUNCTION(hval, seq, hashlog) + ref = ibase + htab[hval]; + + /* calculate distance to the match */ + distance = (unsigned int)(anchor - ref); + + /* update hash table */ + htab[hval] = (uint32_t) (anchor - ibase); + + if (distance == 0 || (distance >= MAX_FARDISTANCE)) { + LITERAL(ip, op, op_limit, anchor, copy) + continue; + } + + /* is this a match? check the first 4 bytes */ + if (BLOSCLZ_UNLIKELY(BLOSCLZ_READU32(ref) == BLOSCLZ_READU32(ip))) { + ref += 4; + } else { + /* no luck, copy as a literal */ + LITERAL(ip, op, op_limit, anchor, copy) + continue; + } + + /* last matched byte */ + ip = anchor + 4; + + /* distance is biased */ + distance--; + + /* get runs or matches; zero distance means a run */ + ip = get_run_or_match(ip, ip_bound, ref, !distance); + + /* length is biased, '1' means a match of 3 bytes */ + ip -= ipshift; + + unsigned len = (int)(ip - anchor); + + // Encoding short lengths is expensive during decompression + if (len < minlen || (len <= 5 && distance >= MAX_DISTANCE)) { + LITERAL(ip, op, op_limit, anchor, copy) + continue; + } + + /* if we have copied something, adjust the copy count */ + if (copy) + /* copy is biased, '0' means 1 byte copy */ + *(op - copy - 1) = (uint8_t)(copy - 1); + else + /* back, to overwrite the copy count */ + op--; + /* reset literal counter */ + copy = 0; + + /* encode the match */ + if (distance < MAX_DISTANCE) { + if (len < 7) { + MATCH_SHORT(op, op_limit, len, distance) + } else { + MATCH_LONG(op, op_limit, len, distance) + } + } else { + /* far away, but not yet in the another galaxy... */ + distance -= MAX_DISTANCE; + if (len < 7) { + MATCH_SHORT_FAR(op, op_limit, len, distance) + } else { + MATCH_LONG_FAR(op, op_limit, len, distance) + } + } + + /* update the hash at match boundary */ + seq = BLOSCLZ_READU32(ip); + HASH_FUNCTION(hval, seq, hashlog) + htab[hval] = (uint32_t) (ip++ - ibase); + if (clevel == 9) { + // In some situations, including a second hash proves to be useful, + // but not in others. Activating here in max clevel only. + seq >>= 8U; + HASH_FUNCTION(hval, seq, hashlog) + htab[hval] = (uint32_t) (ip++ - ibase); + } + else { + ip++; + } + + if (BLOSCLZ_UNLIKELY(op + 1 > op_limit)) + goto out; + + /* assuming literal copy */ + *op++ = MAX_COPY - 1; + } + + /* left-over as literal copy */ + while (BLOSCLZ_UNLIKELY(ip <= ip_bound)) { + if (BLOSCLZ_UNLIKELY(op + 2 > op_limit)) goto out; + *op++ = *ip++; + copy++; + if (BLOSCLZ_UNLIKELY(copy == MAX_COPY)) { + copy = 0; + *op++ = MAX_COPY - 1; + } + } + + /* if we have copied something, adjust the copy length */ + if (copy) + *(op - copy - 1) = (uint8_t)(copy - 1); + else + op--; + + /* marker for blosclz */ + *(uint8_t*)output |= (1U << 5U); + + return (int)(op - (uint8_t*)output); + + out: + return 0; +} + +// See https://habr.com/en/company/yandex/blog/457612/ +#if defined(__AVX2__) + +#if defined(_MSC_VER) +#define ALIGNED_(x) __declspec(align(x)) +#else +#if defined(__GNUC__) +#define ALIGNED_(x) __attribute__ ((aligned(x))) +#endif +#endif +#define ALIGNED_TYPE_(t, x) t ALIGNED_(x) + +static unsigned char* copy_match_16(unsigned char *op, const unsigned char *match, int32_t len) +{ + size_t offset = op - match; + while (len >= 16) { + + static const ALIGNED_TYPE_(uint8_t, 16) masks[] = + { + 0, 1, 2, 1, 4, 1, 4, 2, 8, 7, 6, 5, 4, 3, 2, 1, // offset = 0, not used as mask, but for shift + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // offset = 1 + 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, + 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, + 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, + 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, + 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, + 0, 1, 2, 3, 4, 5, 6, 0, 1, 2, 3, 4, 5, 6, 0, 1, + 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 1, 2, 3, 4, 5, 6, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 0, 1, 2, 3, 4, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 0, 1, 2, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 0, 1, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // offset = 16 + }; + + _mm_storeu_si128((__m128i *)(op), + _mm_shuffle_epi8(_mm_loadu_si128((const __m128i *)(match)), + _mm_load_si128((const __m128i *)(masks) + offset))); + + match += masks[offset]; + + op += 16; + len -= 16; + } + // Deal with remainders + for (; len > 0; len--) { + *op++ = *match++; + } + return op; +} +#endif + +// LZ4 wildCopy which can reach excellent copy bandwidth (even if insecure) +static inline void wild_copy(uint8_t *out, const uint8_t* from, uint8_t* end) { + uint8_t* d = out; + const uint8_t* s = from; + uint8_t* const e = end; + + do { memcpy(d,s,8); d+=8; s+=8; } while (d= 32) { + // match + int32_t len = (int32_t)(ctrl >> 5U) - 1 ; + int32_t ofs = (int32_t)(ctrl & 31U) << 8U; + uint8_t code; + const uint8_t* ref = op - ofs; + + if (len == 7 - 1) { + do { + if (BLOSCLZ_UNLIKELY(ip + 1 >= ip_limit)) { + return 0; + } + code = *ip++; + len += code; + } while (code == 255); + } + else { + if (BLOSCLZ_UNLIKELY(ip + 1 >= ip_limit)) { + return 0; + } + } + code = *ip++; + len += 3; + ref -= code; + + /* match from 16-bit distance */ + if (BLOSCLZ_UNLIKELY(code == 255)) { + if (ofs == (31U << 8U)) { + if (ip + 1 >= ip_limit) { + return 0; + } + ofs = (*ip++) << 8U; + ofs += *ip++; + ref = op - ofs - MAX_DISTANCE; + } + } + + if (BLOSCLZ_UNLIKELY(op + len > op_limit)) { + return 0; + } + + if (BLOSCLZ_UNLIKELY(ref - 1 < (uint8_t*)output)) { + return 0; + } + + if (BLOSCLZ_UNLIKELY(ip >= ip_limit)) break; + ctrl = *ip++; + + ref--; + if (ref == op - 1) { + /* optimized copy for a run */ + memset(op, *ref, len); + op += len; + } + else if ((op - ref >= 8) && (op_limit - op >= len + 8)) { + // copy with an overlap not larger than 8 + wild_copy(op, ref, op + len); + op += len; + } + else { + // general copy with any overlap +#if defined(__AVX2__) + if (op - ref <= 16) { + // This is not faster on a combination of compilers (clang, gcc, icc) or machines, but + // it is not slower either. Let's activate here for experimentation. + op = copy_match_16(op, ref, len); + } + else { +#endif + op = copy_match(op, ref, (unsigned) len); +#if defined(__AVX2__) + } +#endif + } + } + else { + // literal + ctrl++; + if (BLOSCLZ_UNLIKELY(op + ctrl > op_limit)) { + return 0; + } + if (BLOSCLZ_UNLIKELY(ip + ctrl > ip_limit)) { + return 0; + } + + memcpy(op, ip, ctrl); op += ctrl; ip += ctrl; + // On GCC-6, fastcopy this is still faster than plain memcpy + // However, using recent CLANG/LLVM 9.0, there is almost no difference + // in performance. + // And starting on CLANG/LLVM 10 and GCC 9, memcpy is generally faster. + // op = fastcopy(op, ip, (unsigned) ctrl); ip += ctrl; + + if (BLOSCLZ_UNLIKELY(ip >= ip_limit)) break; + ctrl = *ip++; + } + } + + return (int)(op - (uint8_t*)output); +} diff --git a/c-blosc/blosc/blosclz.h b/c-blosc/blosc/blosclz.h new file mode 100644 index 0000000..9b77f2a --- /dev/null +++ b/c-blosc/blosc/blosclz.h @@ -0,0 +1,69 @@ +/********************************************************************* + Blosc - Blocked Shuffling and Compression Library + + Copyright (c) 2021 The Blosc Developers + https://blosc.org + License: BSD 3-Clause (see LICENSE.txt) + + See LICENSE.txt for details about copyright and rights to use. +**********************************************************************/ + +/********************************************************************* + The code in this file is heavily based on FastLZ, a lightning-fast + lossless compression library. See LICENSES/FASTLZ.txt for details + about copyright and rights to use. +**********************************************************************/ + + +#ifndef BLOSCLZ_H +#define BLOSCLZ_H + +#if defined (__cplusplus) +extern "C" { +#endif + +#define BLOSCLZ_VERSION_STRING "2.5.1" + + +/** + Compress a block of data in the input buffer and returns the size of + compressed block. The size of input buffer is specified by + length. The minimum input buffer size is 16. + + The output buffer must be at least 5% larger than the input buffer + and can not be smaller than 66 bytes. + + If the input is not compressible, or output does not fit in maxout + bytes, the return value will be 0 and you will have to discard the + output buffer. + + The acceleration parameter is related with the frequency for + updating the internal hash. An acceleration of 1 means that the + internal hash is updated at full rate. A value < 1 is not allowed + and will be silently set to 1. + + The input buffer and the output buffer can not overlap. +*/ + +int blosclz_compress(int opt_level, const void* input, int length, + void* output, int maxout, int split_block); + +/** + Decompress a block of compressed data and returns the size of the + decompressed block. If error occurs, e.g. the compressed data is + corrupted or the output buffer is not large enough, then 0 (zero) + will be returned instead. + + The input buffer and the output buffer can not overlap. + + Decompression is memory safe and guaranteed not to write the output buffer + more than what is specified in maxout. + */ + +int blosclz_decompress(const void* input, int length, void* output, int maxout); + +#if defined (__cplusplus) +} +#endif + +#endif /* BLOSCLZ_H */ diff --git a/c-blosc/blosc/fastcopy.c b/c-blosc/blosc/fastcopy.c new file mode 100644 index 0000000..2a1c0a0 --- /dev/null +++ b/c-blosc/blosc/fastcopy.c @@ -0,0 +1,639 @@ +/********************************************************************* + Blosc - Blocked Shuffling and Compression Library + + Author: Francesc Alted + Creation date: 2018-01-03 + + See LICENSE.txt for details about copyright and rights to use. +**********************************************************************/ + +/********************************************************************* + The code in this file is heavily based on memcopy.h, from the + + zlib-ng compression library. See LICENSES/ZLIB-NG.txt for details. + See also: https://github.com/zlib-ng/zlib-ng/blob/develop/zlib.h.in + + New implementations by Francesc Alted: + * fast_copy() and copy_run() functions + * Support for SSE2/AVX2 copy instructions for these routines +**********************************************************************/ + +#include +#include "blosc-common.h" + +/* + * Use inlined functions for supported systems. + */ +#if defined(_MSC_VER) && !defined(__cplusplus) /* Visual Studio */ +#define inline __inline /* Visual C is not C99, but supports some kind of inline */ +#endif + + +static inline unsigned char *copy_1_bytes(unsigned char *out, const unsigned char *from) { + *out++ = *from; + return out; +} + +static inline unsigned char *copy_2_bytes(unsigned char *out, const unsigned char *from) { +#if defined(BLOSC_STRICT_ALIGN) + uint16_t chunk; + memcpy(&chunk, from, 2); + memcpy(out, &chunk, 2); +#else + *(uint16_t *) out = *(uint16_t *) from; +#endif + return out + 2; +} + +static inline unsigned char *copy_3_bytes(unsigned char *out, const unsigned char *from) { + out = copy_1_bytes(out, from); + return copy_2_bytes(out, from + 1); +} + +static inline unsigned char *copy_4_bytes(unsigned char *out, const unsigned char *from) { +#if defined(BLOSC_STRICT_ALIGN) + uint32_t chunk; + memcpy(&chunk, from, 4); + memcpy(out, &chunk, 4); +#else + *(uint32_t *) out = *(uint32_t *) from; +#endif + return out + 4; +} + +static inline unsigned char *copy_5_bytes(unsigned char *out, const unsigned char *from) { + out = copy_1_bytes(out, from); + return copy_4_bytes(out, from + 1); +} + +static inline unsigned char *copy_6_bytes(unsigned char *out, const unsigned char *from) { + out = copy_2_bytes(out, from); + return copy_4_bytes(out, from + 2); +} + +static inline unsigned char *copy_7_bytes(unsigned char *out, const unsigned char *from) { + out = copy_3_bytes(out, from); + return copy_4_bytes(out, from + 3); +} + +static inline unsigned char *copy_8_bytes(unsigned char *out, const unsigned char *from) { +#if defined(BLOSC_STRICT_ALIGN) + uint64_t chunk; + memcpy(&chunk, from, 8); + memcpy(out, &chunk, 8); +#else + *(uint64_t *) out = *(uint64_t *) from; +#endif + return out + 8; +} + + +static inline unsigned char *copy_16_bytes(unsigned char *out, const unsigned char *from) { +#if defined(__SSE2__) + __m128i chunk; + chunk = _mm_loadu_si128((__m128i*)from); + _mm_storeu_si128((__m128i*)out, chunk); + out += 16; +#elif !defined(BLOSC_STRICT_ALIGN) + *(uint64_t*)out = *(uint64_t*)from; + from += 8; out += 8; + *(uint64_t*)out = *(uint64_t*)from; + from += 8; out += 8; +#else + int i; + for (i = 0; i < 16; i++) { + *out++ = *from++; + } +#endif + return out; +} + +static inline unsigned char *copy_32_bytes(unsigned char *out, const unsigned char *from) { +#if defined(__AVX2__) + __m256i chunk; + chunk = _mm256_loadu_si256((__m256i*)from); + _mm256_storeu_si256((__m256i*)out, chunk); + out += 32; +#elif defined(__SSE2__) + __m128i chunk; + chunk = _mm_loadu_si128((__m128i*)from); + _mm_storeu_si128((__m128i*)out, chunk); + from += 16; out += 16; + chunk = _mm_loadu_si128((__m128i*)from); + _mm_storeu_si128((__m128i*)out, chunk); + out += 16; +#elif !defined(BLOSC_STRICT_ALIGN) + *(uint64_t*)out = *(uint64_t*)from; + from += 8; out += 8; + *(uint64_t*)out = *(uint64_t*)from; + from += 8; out += 8; + *(uint64_t*)out = *(uint64_t*)from; + from += 8; out += 8; + *(uint64_t*)out = *(uint64_t*)from; + from += 8; out += 8; +#else + int i; + for (i = 0; i < 32; i++) { + *out++ = *from++; + } +#endif + return out; +} + +// This is never used, so comment it out +//#if defined(__AVX2__) +//static inline unsigned char *copy_32_bytes_aligned(unsigned char *out, const unsigned char *from) { +// __m256i chunk; +// chunk = _mm256_load_si256((__m256i*)from); +// _mm256_storeu_si256((__m256i*)out, chunk); +// return out + 32; +//} +//#endif // __AVX2__ + +/* Copy LEN bytes (7 or fewer) from FROM into OUT. Return OUT + LEN. */ +static inline unsigned char *copy_bytes(unsigned char *out, const unsigned char *from, unsigned len) { + assert(len < 8); + +#ifdef BLOSC_STRICT_ALIGN + while (len--) { + *out++ = *from++; + } +#else + switch (len) { + case 7: + return copy_7_bytes(out, from); + case 6: + return copy_6_bytes(out, from); + case 5: + return copy_5_bytes(out, from); + case 4: + return copy_4_bytes(out, from); + case 3: + return copy_3_bytes(out, from); + case 2: + return copy_2_bytes(out, from); + case 1: + return copy_1_bytes(out, from); + case 0: + return out; + default: + assert(0); + } +#endif /* BLOSC_STRICT_ALIGN */ + return out; +} + +// Define a symbol for avoiding fall-through warnings emitted by gcc >= 7.0 +#if ((defined(__GNUC__) && BLOSC_GCC_VERSION >= 700) && !defined(__clang__) && \ + !defined(__ICC) && !defined(__ICL)) +#define AVOID_FALLTHROUGH_WARNING +#endif + +/* Byte by byte semantics: copy LEN bytes from FROM and write them to OUT. Return OUT + LEN. */ +static inline unsigned char *chunk_memcpy(unsigned char *out, const unsigned char *from, unsigned len) { + unsigned sz = sizeof(uint64_t); + unsigned rem = len % sz; + unsigned by8; + + assert(len >= sz); + + /* Copy a few bytes to make sure the loop below has a multiple of SZ bytes to be copied. */ + copy_8_bytes(out, from); + + len /= sz; + out += rem; + from += rem; + + by8 = len % 8; + len -= by8; + switch (by8) { + case 7: + out = copy_8_bytes(out, from); + from += sz; + #ifdef AVOID_FALLTHROUGH_WARNING + __attribute__ ((fallthrough)); // Shut-up -Wimplicit-fallthrough warning in GCC + #endif + case 6: + out = copy_8_bytes(out, from); + from += sz; + #ifdef AVOID_FALLTHROUGH_WARNING + __attribute__ ((fallthrough)); + #endif + case 5: + out = copy_8_bytes(out, from); + from += sz; + #ifdef AVOID_FALLTHROUGH_WARNING + __attribute__ ((fallthrough)); + #endif + case 4: + out = copy_8_bytes(out, from); + from += sz; + #ifdef AVOID_FALLTHROUGH_WARNING + __attribute__ ((fallthrough)); + #endif + case 3: + out = copy_8_bytes(out, from); + from += sz; + #ifdef AVOID_FALLTHROUGH_WARNING + __attribute__ ((fallthrough)); + #endif + case 2: + out = copy_8_bytes(out, from); + from += sz; + #ifdef AVOID_FALLTHROUGH_WARNING + __attribute__ ((fallthrough)); + #endif + case 1: + out = copy_8_bytes(out, from); + from += sz; + #ifdef AVOID_FALLTHROUGH_WARNING + __attribute__ ((fallthrough)); + #endif + default: + break; + } + + while (len) { + out = copy_8_bytes(out, from); + from += sz; + out = copy_8_bytes(out, from); + from += sz; + out = copy_8_bytes(out, from); + from += sz; + out = copy_8_bytes(out, from); + from += sz; + out = copy_8_bytes(out, from); + from += sz; + out = copy_8_bytes(out, from); + from += sz; + out = copy_8_bytes(out, from); + from += sz; + out = copy_8_bytes(out, from); + from += sz; + + len -= 8; + } + + return out; +} + +#if (defined(__SSE2__) && defined(__AVX2__)) +/* 16-byte version of chunk_memcpy() */ +static inline unsigned char *chunk_memcpy_16(unsigned char *out, const unsigned char *from, unsigned len) { + unsigned sz = 16; + unsigned rem = len % sz; + unsigned ilen; + + assert(len >= sz); + + /* Copy a few bytes to make sure the loop below has a multiple of SZ bytes to be copied. */ + copy_16_bytes(out, from); + + len /= sz; + out += rem; + from += rem; + + for (ilen = 0; ilen < len; ilen++) { + copy_16_bytes(out, from); + out += sz; + from += sz; + } + + return out; +} +#endif + + +// NOTE: chunk_memcpy_32() and chunk_memcpy_32_unrolled() are not used, so commenting them + +///* 32-byte version of chunk_memcpy() */ +//static inline unsigned char *chunk_memcpy_32(unsigned char *out, const unsigned char *from, unsigned len) { +// unsigned sz = 32; +// unsigned rem = len % sz; +// unsigned ilen; +// +// assert(len >= sz); +// +// /* Copy a few bytes to make sure the loop below has a multiple of SZ bytes to be copied. */ +// copy_32_bytes(out, from); +// +// len /= sz; +// out += rem; +// from += rem; +// +// for (ilen = 0; ilen < len; ilen++) { +// copy_32_bytes(out, from); +// out += sz; +// from += sz; +// } +// +// return out; +//} +// +///* 32-byte *unrolled* version of chunk_memcpy() */ +//static inline unsigned char *chunk_memcpy_32_unrolled(unsigned char *out, const unsigned char *from, unsigned len) { +// unsigned sz = 32; +// unsigned rem = len % sz; +// unsigned by8; +// +// assert(len >= sz); +// +// /* Copy a few bytes to make sure the loop below has a multiple of SZ bytes to be copied. */ +// copy_32_bytes(out, from); +// +// len /= sz; +// out += rem; +// from += rem; +// +// by8 = len % 8; +// len -= by8; +// switch (by8) { +// case 7: +// out = copy_32_bytes(out, from); +// from += sz; +// case 6: +// out = copy_32_bytes(out, from); +// from += sz; +// case 5: +// out = copy_32_bytes(out, from); +// from += sz; +// case 4: +// out = copy_32_bytes(out, from); +// from += sz; +// case 3: +// out = copy_32_bytes(out, from); +// from += sz; +// case 2: +// out = copy_32_bytes(out, from); +// from += sz; +// case 1: +// out = copy_32_bytes(out, from); +// from += sz; +// default: +// break; +// } +// +// while (len) { +// out = copy_32_bytes(out, from); +// from += sz; +// out = copy_32_bytes(out, from); +// from += sz; +// out = copy_32_bytes(out, from); +// from += sz; +// out = copy_32_bytes(out, from); +// from += sz; +// out = copy_32_bytes(out, from); +// from += sz; +// out = copy_32_bytes(out, from); +// from += sz; +// out = copy_32_bytes(out, from); +// from += sz; +// out = copy_32_bytes(out, from); +// from += sz; +// +// len -= 8; +// } +// +// return out; +//} + + +/* SSE2/AVX2 *unaligned* version of chunk_memcpy() */ +#if defined(__SSE2__) || defined(__AVX2__) +static inline unsigned char *chunk_memcpy_unaligned(unsigned char *out, const unsigned char *from, unsigned len) { +#if defined(__AVX2__) + unsigned sz = sizeof(__m256i); +#elif defined(__SSE2__) + unsigned sz = sizeof(__m128i); +#endif + unsigned rem = len % sz; + unsigned ilen; + + assert(len >= sz); + + /* Copy a few bytes to make sure the loop below has a multiple of SZ bytes to be copied. */ +#if defined(__AVX2__) + copy_32_bytes(out, from); +#elif defined(__SSE2__) + copy_16_bytes(out, from); +#endif + + len /= sz; + out += rem; + from += rem; + + for (ilen = 0; ilen < len; ilen++) { +#if defined(__AVX2__) + copy_32_bytes(out, from); +#elif defined(__SSE2__) + copy_16_bytes(out, from); +#endif + out += sz; + from += sz; + } + + return out; +} +#endif // __SSE2__ || __AVX2__ + + +// NOTE: chunk_memcpy_aligned() is not used, so commenting it + +//#if defined(__SSE2__) || defined(__AVX2__) +///* SSE2/AVX2 *aligned* version of chunk_memcpy() */ +//static inline unsigned char *chunk_memcpy_aligned(unsigned char *out, const unsigned char *from, unsigned len) { +//#if defined(__AVX2__) +// unsigned sz = sizeof(__m256i); +// __m256i chunk; +//#elif defined(__SSE2__) +// unsigned sz = sizeof(__m128i); +// __m128i chunk; +//#endif +// unsigned bytes_to_align = sz - (unsigned)(((uintptr_t)(const void *)(from)) % sz); +// unsigned corrected_len = len - bytes_to_align; +// unsigned rem = corrected_len % sz; +// unsigned ilen; +// +// assert(len >= sz); +// +// /* Copy a few bytes to make sure the loop below has aligned access. */ +//#if defined(__AVX2__) +// chunk = _mm256_loadu_si256((__m256i *) from); +// _mm256_storeu_si256((__m256i *) out, chunk); +//#elif defined(__SSE2__) +// chunk = _mm_loadu_si128((__m128i *) from); +// _mm_storeu_si128((__m128i *) out, chunk); +//#endif +// out += bytes_to_align; +// from += bytes_to_align; +// +// len = corrected_len / sz; +// for (ilen = 0; ilen < len; ilen++) { +//#if defined(__AVX2__) +// chunk = _mm256_load_si256((__m256i *) from); /* *aligned* load */ +// _mm256_storeu_si256((__m256i *) out, chunk); +//#elif defined(__SSE2__) +// chunk = _mm_load_si128((__m128i *) from); /* *aligned* load */ +// _mm_storeu_si128((__m128i *) out, chunk); +//#endif +// out += sz; +// from += sz; +// } +// +// /* Copy remaining bytes */ +// if (rem < 8) { +// out = copy_bytes(out, from, rem); +// } +// else { +// out = chunk_memcpy(out, from, rem); +// } +// +// return out; +//} +//#endif // __AVX2__ || __SSE2__ + + +/* Byte by byte semantics: copy LEN bytes from FROM and write them to OUT. Return OUT + LEN. */ +unsigned char *fastcopy(unsigned char *out, const unsigned char *from, unsigned len) { + switch (len) { + case 32: + return copy_32_bytes(out, from); + case 16: + return copy_16_bytes(out, from); + case 8: + return copy_8_bytes(out, from); + default: { + } + } + if (len < 8) { + return copy_bytes(out, from, len); + } +#if defined(__SSE2__) + if (len < 16) { + return chunk_memcpy(out, from, len); + } +#if !defined(__AVX2__) + return chunk_memcpy_unaligned(out, from, len); +#else + if (len < 32) { + return chunk_memcpy_16(out, from, len); + } + return chunk_memcpy_unaligned(out, from, len); +#endif // !__AVX2__ +#else + return chunk_memcpy(out, from, len); +#endif // __SSE2__ +} + + +/* Copy a run */ +unsigned char* copy_match(unsigned char *out, const unsigned char *from, unsigned len) { +#if defined(__AVX2__) + unsigned sz = sizeof(__m256i); +#elif defined(__SSE2__) + unsigned sz = sizeof(__m128i); +#else + unsigned sz = sizeof(uint64_t); +#endif + +#if ((defined(__GNUC__) && BLOSC_GCC_VERSION < 800) && !defined(__clang__) && !defined(__ICC) && !defined(__ICL)) + // GCC < 8 in fully optimization mode seems to have problems with the code further below so stop here + for (; len > 0; len--) { + *out++ = *from++; + } + return out; +#endif + + // If out and from are away more than the size of the copy, then a fastcopy is safe + unsigned overlap_dist = (unsigned) (out - from); + if (overlap_dist > sz) { + return fastcopy(out, from, len); + } + + // Otherwise we need to be more careful so as not to overwrite destination + switch (overlap_dist) { + case 32: + for (; len >= 32; len -= 32) { + out = copy_32_bytes(out, from); + } + break; + case 30: + for (; len >= 30; len -= 30) { + out = copy_16_bytes(out, from); + out = copy_8_bytes(out, from + 16); + out = copy_4_bytes(out, from + 24); + out = copy_2_bytes(out, from + 28); + } + break; + case 28: + for (; len >= 28; len -= 28) { + out = copy_16_bytes(out, from); + out = copy_8_bytes(out, from + 16); + out = copy_4_bytes(out, from + 24); + } + break; + case 26: + for (; len >= 26; len -= 26) { + out = copy_16_bytes(out, from); + out = copy_8_bytes(out, from + 16); + out = copy_2_bytes(out, from + 24); + } + break; + case 24: + for (; len >= 24; len -= 24) { + out = copy_16_bytes(out, from); + out = copy_8_bytes(out, from + 16); + } + break; + case 22: + for (; len >= 22; len -= 22) { + out = copy_16_bytes(out, from); + out = copy_4_bytes(out, from + 16); + out = copy_2_bytes(out, from + 20); + } + break; + case 20: + for (; len >= 20; len -= 20) { + out = copy_16_bytes(out, from); + out = copy_4_bytes(out, from + 16); + } + break; + case 18: + for (; len >= 18; len -= 18) { + out = copy_16_bytes(out, from); + out = copy_2_bytes(out, from + 16); + } + break; + case 16: + for (; len >= 16; len -= 16) { + out = copy_16_bytes(out, from); + } + break; + case 8: + for (; len >= 8; len -= 8) { + out = copy_8_bytes(out, from); + } + break; + case 4: + for (; len >= 4; len -= 4) { + out = copy_4_bytes(out, from); + } + break; + case 2: + for (; len >= 2; len -= 2) { + out = copy_2_bytes(out, from); + } + break; + default: + for (; len > 0; len--) { + *out++ = *from++; + } + } + + // Copy the leftovers + for (; len > 0; len--) { + *out++ = *from++; + } + + return out; +} diff --git a/c-blosc/blosc/fastcopy.h b/c-blosc/blosc/fastcopy.h new file mode 100644 index 0000000..3dfe634 --- /dev/null +++ b/c-blosc/blosc/fastcopy.h @@ -0,0 +1,19 @@ +/********************************************************************* + Blosc - Blocked Shuffling and Compression Library + + Author: Francesc Alted + Creation date: 2018-01-03 + + See LICENSE.txt for details about copyright and rights to use. +**********************************************************************/ + +#ifndef BLOSC_FASTCOPY_H +#define BLOSC_FASTCOPY_H + +/* Same semantics than memcpy() */ +unsigned char *fastcopy(unsigned char *out, const unsigned char *from, unsigned len); + +/* Same as fastcopy() but without overwriting origin or destination when they overlap */ +unsigned char* copy_match(unsigned char *out, const unsigned char *from, unsigned len); + +#endif //BLOSC_FASTCOPY_H diff --git a/c-blosc/blosc/shuffle-avx2.c b/c-blosc/blosc/shuffle-avx2.c new file mode 100644 index 0000000..3f78690 --- /dev/null +++ b/c-blosc/blosc/shuffle-avx2.c @@ -0,0 +1,772 @@ +/********************************************************************* + Blosc - Blocked Shuffling and Compression Library + + Author: Francesc Alted + + See LICENSE.txt for details about copyright and rights to use. +**********************************************************************/ + +#include "shuffle-generic.h" +#include "shuffle-avx2.h" + +/* Define dummy functions if AVX2 is not available for the compilation target and compiler. */ +#if !defined(__AVX2__) +#include + +void +blosc_internal_shuffle_avx2(const size_t bytesoftype, const size_t blocksize, + const uint8_t* const _src, uint8_t* const _dest) { + abort(); +} + +void +blosc_internal_unshuffle_avx2(const size_t bytesoftype, const size_t blocksize, + const uint8_t* const _src, uint8_t* const _dest) { + abort(); +} + +#else /* defined(__AVX2__) */ + +#include + + +/* The next is useful for debugging purposes */ +#if 0 +#include +#include + +static void printymm(__m256i ymm0) +{ + uint8_t buf[32]; + + ((__m256i *)buf)[0] = ymm0; + printf("%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x\n", + buf[0], buf[1], buf[2], buf[3], + buf[4], buf[5], buf[6], buf[7], + buf[8], buf[9], buf[10], buf[11], + buf[12], buf[13], buf[14], buf[15], + buf[16], buf[17], buf[18], buf[19], + buf[20], buf[21], buf[22], buf[23], + buf[24], buf[25], buf[26], buf[27], + buf[28], buf[29], buf[30], buf[31]); +} +#endif + +/* GCC doesn't include the split load/store intrinsics + needed for the tiled shuffle, so define them here. */ +#if defined(__GNUC__) && !defined(__clang__) && !defined(__ICC) +static inline __m256i +__attribute__((__always_inline__)) +_mm256_loadu2_m128i(const __m128i* const hiaddr, const __m128i* const loaddr) +{ + return _mm256_inserti128_si256( + _mm256_castsi128_si256(_mm_loadu_si128(loaddr)), _mm_loadu_si128(hiaddr), 1); +} + +static inline void +__attribute__((__always_inline__)) +_mm256_storeu2_m128i(__m128i* const hiaddr, __m128i* const loaddr, const __m256i a) +{ + _mm_storeu_si128(loaddr, _mm256_castsi256_si128(a)); + _mm_storeu_si128(hiaddr, _mm256_extracti128_si256(a, 1)); +} +#endif /* defined(__GNUC__) */ + +/* Routine optimized for shuffling a buffer for a type size of 2 bytes. */ +static void +shuffle2_avx2(uint8_t* const dest, const uint8_t* const src, + const size_t vectorizable_elements, const size_t total_elements) +{ + static const size_t bytesoftype = 2; + size_t j; + int k; + __m256i ymm0[2], ymm1[2]; + + /* Create the shuffle mask. + NOTE: The XMM/YMM 'set' intrinsics require the arguments to be ordered from + most to least significant (i.e., their order is reversed when compared to + loading the mask from an array). */ + const __m256i shmask = _mm256_set_epi8( + 0x0f, 0x0d, 0x0b, 0x09, 0x07, 0x05, 0x03, 0x01, + 0x0e, 0x0c, 0x0a, 0x08, 0x06, 0x04, 0x02, 0x00, + 0x0f, 0x0d, 0x0b, 0x09, 0x07, 0x05, 0x03, 0x01, + 0x0e, 0x0c, 0x0a, 0x08, 0x06, 0x04, 0x02, 0x00); + + for (j = 0; j < vectorizable_elements; j += sizeof(__m256i)) { + /* Fetch 32 elements (64 bytes) then transpose bytes, words and double words. */ + for (k = 0; k < 2; k++) { + ymm0[k] = _mm256_loadu_si256((__m256i*)(src + (j * bytesoftype) + (k * sizeof(__m256i)))); + ymm1[k] = _mm256_shuffle_epi8(ymm0[k], shmask); + } + + ymm0[0] = _mm256_permute4x64_epi64(ymm1[0], 0xd8); + ymm0[1] = _mm256_permute4x64_epi64(ymm1[1], 0x8d); + + ymm1[0] = _mm256_blend_epi32(ymm0[0], ymm0[1], 0xf0); + ymm0[1] = _mm256_blend_epi32(ymm0[0], ymm0[1], 0x0f); + ymm1[1] = _mm256_permute4x64_epi64(ymm0[1], 0x4e); + + /* Store the result vectors */ + uint8_t* const dest_for_jth_element = dest + j; + for (k = 0; k < 2; k++) { + _mm256_storeu_si256((__m256i*)(dest_for_jth_element + (k * total_elements)), ymm1[k]); + } + } +} + +/* Routine optimized for shuffling a buffer for a type size of 4 bytes. */ +static void +shuffle4_avx2(uint8_t* const dest, const uint8_t* const src, + const size_t vectorizable_elements, const size_t total_elements) +{ + static const size_t bytesoftype = 4; + size_t i; + int j; + __m256i ymm0[4], ymm1[4]; + + /* Create the shuffle mask. + NOTE: The XMM/YMM 'set' intrinsics require the arguments to be ordered from + most to least significant (i.e., their order is reversed when compared to + loading the mask from an array). */ + const __m256i mask = _mm256_set_epi32( + 0x07, 0x03, 0x06, 0x02, 0x05, 0x01, 0x04, 0x00); + + for (i = 0; i < vectorizable_elements; i += sizeof(__m256i)) { + /* Fetch 32 elements (128 bytes) then transpose bytes and words. */ + for (j = 0; j < 4; j++) { + ymm0[j] = _mm256_loadu_si256((__m256i*)(src + (i * bytesoftype) + (j * sizeof(__m256i)))); + ymm1[j] = _mm256_shuffle_epi32(ymm0[j], 0xd8); + ymm0[j] = _mm256_shuffle_epi32(ymm0[j], 0x8d); + ymm0[j] = _mm256_unpacklo_epi8(ymm1[j], ymm0[j]); + ymm1[j] = _mm256_shuffle_epi32(ymm0[j], 0x04e); + ymm0[j] = _mm256_unpacklo_epi16(ymm0[j], ymm1[j]); + } + /* Transpose double words */ + for (j = 0; j < 2; j++) { + ymm1[j*2] = _mm256_unpacklo_epi32(ymm0[j*2], ymm0[j*2+1]); + ymm1[j*2+1] = _mm256_unpackhi_epi32(ymm0[j*2], ymm0[j*2+1]); + } + /* Transpose quad words */ + for (j = 0; j < 2; j++) { + ymm0[j*2] = _mm256_unpacklo_epi64(ymm1[j], ymm1[j+2]); + ymm0[j*2+1] = _mm256_unpackhi_epi64(ymm1[j], ymm1[j+2]); + } + for (j = 0; j < 4; j++) { + ymm0[j] = _mm256_permutevar8x32_epi32(ymm0[j], mask); + } + /* Store the result vectors */ + uint8_t* const dest_for_ith_element = dest + i; + for (j = 0; j < 4; j++) { + _mm256_storeu_si256((__m256i*)(dest_for_ith_element + (j * total_elements)), ymm0[j]); + } + } +} + +/* Routine optimized for shuffling a buffer for a type size of 8 bytes. */ +static void +shuffle8_avx2(uint8_t* const dest, const uint8_t* const src, + const size_t vectorizable_elements, const size_t total_elements) +{ + static const size_t bytesoftype = 8; + size_t j; + int k, l; + __m256i ymm0[8], ymm1[8]; + + for (j = 0; j < vectorizable_elements; j += sizeof(__m256i)) { + /* Fetch 32 elements (256 bytes) then transpose bytes. */ + for (k = 0; k < 8; k++) { + ymm0[k] = _mm256_loadu_si256((__m256i*)(src + (j * bytesoftype) + (k * sizeof(__m256i)))); + ymm1[k] = _mm256_shuffle_epi32(ymm0[k], 0x4e); + ymm1[k] = _mm256_unpacklo_epi8(ymm0[k], ymm1[k]); + } + /* Transpose words */ + for (k = 0, l = 0; k < 4; k++, l +=2) { + ymm0[k*2] = _mm256_unpacklo_epi16(ymm1[l], ymm1[l+1]); + ymm0[k*2+1] = _mm256_unpackhi_epi16(ymm1[l], ymm1[l+1]); + } + /* Transpose double words */ + for (k = 0, l = 0; k < 4; k++, l++) { + if (k == 2) l += 2; + ymm1[k*2] = _mm256_unpacklo_epi32(ymm0[l], ymm0[l+2]); + ymm1[k*2+1] = _mm256_unpackhi_epi32(ymm0[l], ymm0[l+2]); + } + /* Transpose quad words */ + for (k = 0; k < 4; k++) { + ymm0[k*2] = _mm256_unpacklo_epi64(ymm1[k], ymm1[k+4]); + ymm0[k*2+1] = _mm256_unpackhi_epi64(ymm1[k], ymm1[k+4]); + } + for(k = 0; k < 8; k++) { + ymm1[k] = _mm256_permute4x64_epi64(ymm0[k], 0x72); + ymm0[k] = _mm256_permute4x64_epi64(ymm0[k], 0xD8); + ymm0[k] = _mm256_unpacklo_epi16(ymm0[k], ymm1[k]); + } + /* Store the result vectors */ + uint8_t* const dest_for_jth_element = dest + j; + for (k = 0; k < 8; k++) { + _mm256_storeu_si256((__m256i*)(dest_for_jth_element + (k * total_elements)), ymm0[k]); + } + } +} + +/* Routine optimized for shuffling a buffer for a type size of 16 bytes. */ +static void +shuffle16_avx2(uint8_t* const dest, const uint8_t* const src, + const size_t vectorizable_elements, const size_t total_elements) +{ + static const size_t bytesoftype = 16; + size_t j; + int k, l; + __m256i ymm0[16], ymm1[16]; + + /* Create the shuffle mask. + NOTE: The XMM/YMM 'set' intrinsics require the arguments to be ordered from + most to least significant (i.e., their order is reversed when compared to + loading the mask from an array). */ + const __m256i shmask = _mm256_set_epi8( + 0x0f, 0x07, 0x0e, 0x06, 0x0d, 0x05, 0x0c, 0x04, + 0x0b, 0x03, 0x0a, 0x02, 0x09, 0x01, 0x08, 0x00, + 0x0f, 0x07, 0x0e, 0x06, 0x0d, 0x05, 0x0c, 0x04, + 0x0b, 0x03, 0x0a, 0x02, 0x09, 0x01, 0x08, 0x00); + + for (j = 0; j < vectorizable_elements; j += sizeof(__m256i)) { + /* Fetch 32 elements (512 bytes) into 16 YMM registers. */ + for (k = 0; k < 16; k++) { + ymm0[k] = _mm256_loadu_si256((__m256i*)(src + (j * bytesoftype) + (k * sizeof(__m256i)))); + } + /* Transpose bytes */ + for (k = 0, l = 0; k < 8; k++, l +=2) { + ymm1[k*2] = _mm256_unpacklo_epi8(ymm0[l], ymm0[l+1]); + ymm1[k*2+1] = _mm256_unpackhi_epi8(ymm0[l], ymm0[l+1]); + } + /* Transpose words */ + for (k = 0, l = -2; k < 8; k++, l++) { + if ((k%2) == 0) l += 2; + ymm0[k*2] = _mm256_unpacklo_epi16(ymm1[l], ymm1[l+2]); + ymm0[k*2+1] = _mm256_unpackhi_epi16(ymm1[l], ymm1[l+2]); + } + /* Transpose double words */ + for (k = 0, l = -4; k < 8; k++, l++) { + if ((k%4) == 0) l += 4; + ymm1[k*2] = _mm256_unpacklo_epi32(ymm0[l], ymm0[l+4]); + ymm1[k*2+1] = _mm256_unpackhi_epi32(ymm0[l], ymm0[l+4]); + } + /* Transpose quad words */ + for (k = 0; k < 8; k++) { + ymm0[k*2] = _mm256_unpacklo_epi64(ymm1[k], ymm1[k+8]); + ymm0[k*2+1] = _mm256_unpackhi_epi64(ymm1[k], ymm1[k+8]); + } + for (k = 0; k < 16; k++) { + ymm0[k] = _mm256_permute4x64_epi64(ymm0[k], 0xd8); + ymm0[k] = _mm256_shuffle_epi8(ymm0[k], shmask); + } + /* Store the result vectors */ + uint8_t* const dest_for_jth_element = dest + j; + for (k = 0; k < 16; k++) { + _mm256_storeu_si256((__m256i*)(dest_for_jth_element + (k * total_elements)), ymm0[k]); + } + } +} + +/* Routine optimized for shuffling a buffer for a type size larger than 16 bytes. */ +static void +shuffle16_tiled_avx2(uint8_t* const dest, const uint8_t* const src, + const size_t vectorizable_elements, const size_t total_elements, const size_t bytesoftype) +{ + size_t j; + int k, l; + __m256i ymm0[16], ymm1[16]; + + const lldiv_t vecs_per_el = lldiv(bytesoftype, sizeof(__m128i)); + + /* Create the shuffle mask. + NOTE: The XMM/YMM 'set' intrinsics require the arguments to be ordered from + most to least significant (i.e., their order is reversed when compared to + loading the mask from an array). */ + const __m256i shmask = _mm256_set_epi8( + 0x0f, 0x07, 0x0e, 0x06, 0x0d, 0x05, 0x0c, 0x04, + 0x0b, 0x03, 0x0a, 0x02, 0x09, 0x01, 0x08, 0x00, + 0x0f, 0x07, 0x0e, 0x06, 0x0d, 0x05, 0x0c, 0x04, + 0x0b, 0x03, 0x0a, 0x02, 0x09, 0x01, 0x08, 0x00); + + for (j = 0; j < vectorizable_elements; j += sizeof(__m256i)) { + /* Advance the offset into the type by the vector size (in bytes), unless this is + the initial iteration and the type size is not a multiple of the vector size. + In that case, only advance by the number of bytes necessary so that the number + of remaining bytes in the type will be a multiple of the vector size. */ + size_t offset_into_type; + for (offset_into_type = 0; offset_into_type < bytesoftype; + offset_into_type += (offset_into_type == 0 && vecs_per_el.rem > 0 ? vecs_per_el.rem : sizeof(__m128i))) { + + /* Fetch elements in groups of 512 bytes */ + const uint8_t* const src_with_offset = src + offset_into_type; + for (k = 0; k < 16; k++) { + ymm0[k] = _mm256_loadu2_m128i( + (__m128i*)(src_with_offset + (j + (2 * k) + 1) * bytesoftype), + (__m128i*)(src_with_offset + (j + (2 * k)) * bytesoftype)); + } + /* Transpose bytes */ + for (k = 0, l = 0; k < 8; k++, l +=2) { + ymm1[k*2] = _mm256_unpacklo_epi8(ymm0[l], ymm0[l+1]); + ymm1[k*2+1] = _mm256_unpackhi_epi8(ymm0[l], ymm0[l+1]); + } + /* Transpose words */ + for (k = 0, l = -2; k < 8; k++, l++) { + if ((k%2) == 0) l += 2; + ymm0[k*2] = _mm256_unpacklo_epi16(ymm1[l], ymm1[l+2]); + ymm0[k*2+1] = _mm256_unpackhi_epi16(ymm1[l], ymm1[l+2]); + } + /* Transpose double words */ + for (k = 0, l = -4; k < 8; k++, l++) { + if ((k%4) == 0) l += 4; + ymm1[k*2] = _mm256_unpacklo_epi32(ymm0[l], ymm0[l+4]); + ymm1[k*2+1] = _mm256_unpackhi_epi32(ymm0[l], ymm0[l+4]); + } + /* Transpose quad words */ + for (k = 0; k < 8; k++) { + ymm0[k*2] = _mm256_unpacklo_epi64(ymm1[k], ymm1[k+8]); + ymm0[k*2+1] = _mm256_unpackhi_epi64(ymm1[k], ymm1[k+8]); + } + for (k = 0; k < 16; k++) { + ymm0[k] = _mm256_permute4x64_epi64(ymm0[k], 0xd8); + ymm0[k] = _mm256_shuffle_epi8(ymm0[k], shmask); + } + /* Store the result vectors */ + uint8_t* const dest_for_jth_element = dest + j; + for (k = 0; k < 16; k++) { + _mm256_storeu_si256((__m256i*)(dest_for_jth_element + (total_elements * (offset_into_type + k))), ymm0[k]); + } + } + } +} + +/* Routine optimized for unshuffling a buffer for a type size of 2 bytes. */ +static void +unshuffle2_avx2(uint8_t* const dest, const uint8_t* const src, + const size_t vectorizable_elements, const size_t total_elements) +{ + static const size_t bytesoftype = 2; + size_t i; + int j; + __m256i ymm0[2], ymm1[2]; + + for (i = 0; i < vectorizable_elements; i += sizeof(__m256i)) { + /* Load 32 elements (64 bytes) into 2 YMM registers. */ + const uint8_t* const src_for_ith_element = src + i; + for (j = 0; j < 2; j++) { + ymm0[j] = _mm256_loadu_si256((__m256i*)(src_for_ith_element + (j * total_elements))); + } + /* Shuffle bytes */ + for (j = 0; j < 2; j++) { + ymm0[j] = _mm256_permute4x64_epi64(ymm0[j], 0xd8); + } + /* Compute the low 64 bytes */ + ymm1[0] = _mm256_unpacklo_epi8(ymm0[0], ymm0[1]); + /* Compute the hi 64 bytes */ + ymm1[1] = _mm256_unpackhi_epi8(ymm0[0], ymm0[1]); + /* Store the result vectors in proper order */ + _mm256_storeu_si256((__m256i*)(dest + (i * bytesoftype) + (0 * sizeof(__m256i))), ymm1[0]); + _mm256_storeu_si256((__m256i*)(dest + (i * bytesoftype) + (1 * sizeof(__m256i))), ymm1[1]); + } +} + +/* Routine optimized for unshuffling a buffer for a type size of 4 bytes. */ +static void +unshuffle4_avx2(uint8_t* const dest, const uint8_t* const src, + const size_t vectorizable_elements, const size_t total_elements) +{ + static const size_t bytesoftype = 4; + size_t i; + int j; + __m256i ymm0[4], ymm1[4]; + + for (i = 0; i < vectorizable_elements; i += sizeof(__m256i)) { + /* Load 32 elements (128 bytes) into 4 YMM registers. */ + const uint8_t* const src_for_ith_element = src + i; + for (j = 0; j < 4; j++) { + ymm0[j] = _mm256_loadu_si256((__m256i*)(src_for_ith_element + (j * total_elements))); + } + /* Shuffle bytes */ + for (j = 0; j < 2; j++) { + /* Compute the low 64 bytes */ + ymm1[j] = _mm256_unpacklo_epi8(ymm0[j*2], ymm0[j*2+1]); + /* Compute the hi 64 bytes */ + ymm1[2+j] = _mm256_unpackhi_epi8(ymm0[j*2], ymm0[j*2+1]); + } + /* Shuffle 2-byte words */ + for (j = 0; j < 2; j++) { + /* Compute the low 64 bytes */ + ymm0[j] = _mm256_unpacklo_epi16(ymm1[j*2], ymm1[j*2+1]); + /* Compute the hi 64 bytes */ + ymm0[2+j] = _mm256_unpackhi_epi16(ymm1[j*2], ymm1[j*2+1]); + } + ymm1[0] = _mm256_permute2x128_si256(ymm0[0], ymm0[2], 0x20); + ymm1[1] = _mm256_permute2x128_si256(ymm0[1], ymm0[3], 0x20); + ymm1[2] = _mm256_permute2x128_si256(ymm0[0], ymm0[2], 0x31); + ymm1[3] = _mm256_permute2x128_si256(ymm0[1], ymm0[3], 0x31); + + /* Store the result vectors in proper order */ + for (j = 0; j < 4; j++) { + _mm256_storeu_si256((__m256i*)(dest + (i * bytesoftype) + (j * sizeof(__m256i))), ymm1[j]); + } + } +} + +/* Routine optimized for unshuffling a buffer for a type size of 8 bytes. */ +static void +unshuffle8_avx2(uint8_t* const dest, const uint8_t* const src, + const size_t vectorizable_elements, const size_t total_elements) +{ + static const size_t bytesoftype = 8; + size_t i; + int j; + __m256i ymm0[8], ymm1[8]; + + for (i = 0; i < vectorizable_elements; i += sizeof(__m256i)) { + /* Fetch 32 elements (256 bytes) into 8 YMM registers. */ + const uint8_t* const src_for_ith_element = src + i; + for (j = 0; j < 8; j++) { + ymm0[j] = _mm256_loadu_si256((__m256i*)(src_for_ith_element + (j * total_elements))); + } + /* Shuffle bytes */ + for (j = 0; j < 4; j++) { + /* Compute the low 32 bytes */ + ymm1[j] = _mm256_unpacklo_epi8(ymm0[j*2], ymm0[j*2+1]); + /* Compute the hi 32 bytes */ + ymm1[4+j] = _mm256_unpackhi_epi8(ymm0[j*2], ymm0[j*2+1]); + } + /* Shuffle words */ + for (j = 0; j < 4; j++) { + /* Compute the low 32 bytes */ + ymm0[j] = _mm256_unpacklo_epi16(ymm1[j*2], ymm1[j*2+1]); + /* Compute the hi 32 bytes */ + ymm0[4+j] = _mm256_unpackhi_epi16(ymm1[j*2], ymm1[j*2+1]); + } + for (j = 0; j < 8; j++) { + ymm0[j] = _mm256_permute4x64_epi64(ymm0[j], 0xd8); + } + + /* Shuffle 4-byte dwords */ + for (j = 0; j < 4; j++) { + /* Compute the low 32 bytes */ + ymm1[j] = _mm256_unpacklo_epi32(ymm0[j*2], ymm0[j*2+1]); + /* Compute the hi 32 bytes */ + ymm1[4+j] = _mm256_unpackhi_epi32(ymm0[j*2], ymm0[j*2+1]); + } + + /* Store the result vectors in proper order */ + _mm256_storeu_si256((__m256i*)(dest + (i * bytesoftype) + (0 * sizeof(__m256i))), ymm1[0]); + _mm256_storeu_si256((__m256i*)(dest + (i * bytesoftype) + (1 * sizeof(__m256i))), ymm1[2]); + _mm256_storeu_si256((__m256i*)(dest + (i * bytesoftype) + (2 * sizeof(__m256i))), ymm1[1]); + _mm256_storeu_si256((__m256i*)(dest + (i * bytesoftype) + (3 * sizeof(__m256i))), ymm1[3]); + _mm256_storeu_si256((__m256i*)(dest + (i * bytesoftype) + (4 * sizeof(__m256i))), ymm1[4]); + _mm256_storeu_si256((__m256i*)(dest + (i * bytesoftype) + (5 * sizeof(__m256i))), ymm1[6]); + _mm256_storeu_si256((__m256i*)(dest + (i * bytesoftype) + (6 * sizeof(__m256i))), ymm1[5]); + _mm256_storeu_si256((__m256i*)(dest + (i * bytesoftype) + (7 * sizeof(__m256i))), ymm1[7]); + } +} + +/* Routine optimized for unshuffling a buffer for a type size of 16 bytes. */ +static void +unshuffle16_avx2(uint8_t* const dest, const uint8_t* const src, + const size_t vectorizable_elements, const size_t total_elements) +{ + static const size_t bytesoftype = 16; + size_t i; + int j; + __m256i ymm0[16], ymm1[16]; + + for (i = 0; i < vectorizable_elements; i += sizeof(__m256i)) { + /* Fetch 32 elements (512 bytes) into 16 YMM registers. */ + const uint8_t* const src_for_ith_element = src + i; + for (j = 0; j < 16; j++) { + ymm0[j] = _mm256_loadu_si256((__m256i*)(src_for_ith_element + (j * total_elements))); + } + + /* Shuffle bytes */ + for (j = 0; j < 8; j++) { + /* Compute the low 32 bytes */ + ymm1[j] = _mm256_unpacklo_epi8(ymm0[j*2], ymm0[j*2+1]); + /* Compute the hi 32 bytes */ + ymm1[8+j] = _mm256_unpackhi_epi8(ymm0[j*2], ymm0[j*2+1]); + } + /* Shuffle 2-byte words */ + for (j = 0; j < 8; j++) { + /* Compute the low 32 bytes */ + ymm0[j] = _mm256_unpacklo_epi16(ymm1[j*2], ymm1[j*2+1]); + /* Compute the hi 32 bytes */ + ymm0[8+j] = _mm256_unpackhi_epi16(ymm1[j*2], ymm1[j*2+1]); + } + /* Shuffle 4-byte dwords */ + for (j = 0; j < 8; j++) { + /* Compute the low 32 bytes */ + ymm1[j] = _mm256_unpacklo_epi32(ymm0[j*2], ymm0[j*2+1]); + /* Compute the hi 32 bytes */ + ymm1[8+j] = _mm256_unpackhi_epi32(ymm0[j*2], ymm0[j*2+1]); + } + + /* Shuffle 8-byte qwords */ + for (j = 0; j < 8; j++) { + /* Compute the low 32 bytes */ + ymm0[j] = _mm256_unpacklo_epi64(ymm1[j*2], ymm1[j*2+1]); + /* Compute the hi 32 bytes */ + ymm0[8+j] = _mm256_unpackhi_epi64(ymm1[j*2], ymm1[j*2+1]); + } + + for (j = 0; j < 8; j++) { + ymm1[j] = _mm256_permute2x128_si256(ymm0[j], ymm0[j+8], 0x20); + ymm1[j+8] = _mm256_permute2x128_si256(ymm0[j], ymm0[j+8], 0x31); + } + + /* Store the result vectors in proper order */ + _mm256_storeu_si256((__m256i*)(dest + (i * bytesoftype) + (0 * sizeof(__m256i))), ymm1[0]); + _mm256_storeu_si256((__m256i*)(dest + (i * bytesoftype) + (1 * sizeof(__m256i))), ymm1[4]); + _mm256_storeu_si256((__m256i*)(dest + (i * bytesoftype) + (2 * sizeof(__m256i))), ymm1[2]); + _mm256_storeu_si256((__m256i*)(dest + (i * bytesoftype) + (3 * sizeof(__m256i))), ymm1[6]); + _mm256_storeu_si256((__m256i*)(dest + (i * bytesoftype) + (4 * sizeof(__m256i))), ymm1[1]); + _mm256_storeu_si256((__m256i*)(dest + (i * bytesoftype) + (5 * sizeof(__m256i))), ymm1[5]); + _mm256_storeu_si256((__m256i*)(dest + (i * bytesoftype) + (6 * sizeof(__m256i))), ymm1[3]); + _mm256_storeu_si256((__m256i*)(dest + (i * bytesoftype) + (7 * sizeof(__m256i))), ymm1[7]); + _mm256_storeu_si256((__m256i*)(dest + (i * bytesoftype) + (8 * sizeof(__m256i))), ymm1[8]); + _mm256_storeu_si256((__m256i*)(dest + (i * bytesoftype) + (9 * sizeof(__m256i))), ymm1[12]); + _mm256_storeu_si256((__m256i*)(dest + (i * bytesoftype) + (10 * sizeof(__m256i))), ymm1[10]); + _mm256_storeu_si256((__m256i*)(dest + (i * bytesoftype) + (11 * sizeof(__m256i))), ymm1[14]); + _mm256_storeu_si256((__m256i*)(dest + (i * bytesoftype) + (12 * sizeof(__m256i))), ymm1[9]); + _mm256_storeu_si256((__m256i*)(dest + (i * bytesoftype) + (13 * sizeof(__m256i))), ymm1[13]); + _mm256_storeu_si256((__m256i*)(dest + (i * bytesoftype) + (14 * sizeof(__m256i))), ymm1[11]); + _mm256_storeu_si256((__m256i*)(dest + (i * bytesoftype) + (15 * sizeof(__m256i))), ymm1[15]); + } +} + +/* Routine optimized for unshuffling a buffer for a type size larger than 16 bytes. */ +static void +unshuffle16_tiled_avx2(uint8_t* const dest, const uint8_t* const src, + const size_t vectorizable_elements, const size_t total_elements, const size_t bytesoftype) +{ + size_t i; + int j; + __m256i ymm0[16], ymm1[16]; + + const lldiv_t vecs_per_el = lldiv(bytesoftype, sizeof(__m128i)); + + /* The unshuffle loops are inverted (compared to shuffle_tiled16_avx2) + to optimize cache utilization. */ + size_t offset_into_type; + for (offset_into_type = 0; offset_into_type < bytesoftype; + offset_into_type += (offset_into_type == 0 && vecs_per_el.rem > 0 ? vecs_per_el.rem : sizeof(__m128i))) { + for (i = 0; i < vectorizable_elements; i += sizeof(__m256i)) { + /* Load the first 16 bytes of 32 adjacent elements (512 bytes) into 16 YMM registers */ + const uint8_t* const src_for_ith_element = src + i; + for (j = 0; j < 16; j++) { + ymm0[j] = _mm256_loadu_si256((__m256i*)(src_for_ith_element + (total_elements * (offset_into_type + j)))); + } + + /* Shuffle bytes */ + for (j = 0; j < 8; j++) { + /* Compute the low 32 bytes */ + ymm1[j] = _mm256_unpacklo_epi8(ymm0[j*2], ymm0[j*2+1]); + /* Compute the hi 32 bytes */ + ymm1[8+j] = _mm256_unpackhi_epi8(ymm0[j*2], ymm0[j*2+1]); + } + /* Shuffle 2-byte words */ + for (j = 0; j < 8; j++) { + /* Compute the low 32 bytes */ + ymm0[j] = _mm256_unpacklo_epi16(ymm1[j*2], ymm1[j*2+1]); + /* Compute the hi 32 bytes */ + ymm0[8+j] = _mm256_unpackhi_epi16(ymm1[j*2], ymm1[j*2+1]); + } + /* Shuffle 4-byte dwords */ + for (j = 0; j < 8; j++) { + /* Compute the low 32 bytes */ + ymm1[j] = _mm256_unpacklo_epi32(ymm0[j*2], ymm0[j*2+1]); + /* Compute the hi 32 bytes */ + ymm1[8+j] = _mm256_unpackhi_epi32(ymm0[j*2], ymm0[j*2+1]); + } + + /* Shuffle 8-byte qwords */ + for (j = 0; j < 8; j++) { + /* Compute the low 32 bytes */ + ymm0[j] = _mm256_unpacklo_epi64(ymm1[j*2], ymm1[j*2+1]); + /* Compute the hi 32 bytes */ + ymm0[8+j] = _mm256_unpackhi_epi64(ymm1[j*2], ymm1[j*2+1]); + } + + for (j = 0; j < 8; j++) { + ymm1[j] = _mm256_permute2x128_si256(ymm0[j], ymm0[j+8], 0x20); + ymm1[j+8] = _mm256_permute2x128_si256(ymm0[j], ymm0[j+8], 0x31); + } + + /* Store the result vectors in proper order */ + const uint8_t* const dest_with_offset = dest + offset_into_type; + _mm256_storeu2_m128i( + (__m128i*)(dest_with_offset + (i + 0x01) * bytesoftype), + (__m128i*)(dest_with_offset + (i + 0x00) * bytesoftype), ymm1[0]); + _mm256_storeu2_m128i( + (__m128i*)(dest_with_offset + (i + 0x03) * bytesoftype), + (__m128i*)(dest_with_offset + (i + 0x02) * bytesoftype), ymm1[4]); + _mm256_storeu2_m128i( + (__m128i*)(dest_with_offset + (i + 0x05) * bytesoftype), + (__m128i*)(dest_with_offset + (i + 0x04) * bytesoftype), ymm1[2]); + _mm256_storeu2_m128i( + (__m128i*)(dest_with_offset + (i + 0x07) * bytesoftype), + (__m128i*)(dest_with_offset + (i + 0x06) * bytesoftype), ymm1[6]); + _mm256_storeu2_m128i( + (__m128i*)(dest_with_offset + (i + 0x09) * bytesoftype), + (__m128i*)(dest_with_offset + (i + 0x08) * bytesoftype), ymm1[1]); + _mm256_storeu2_m128i( + (__m128i*)(dest_with_offset + (i + 0x0b) * bytesoftype), + (__m128i*)(dest_with_offset + (i + 0x0a) * bytesoftype), ymm1[5]); + _mm256_storeu2_m128i( + (__m128i*)(dest_with_offset + (i + 0x0d) * bytesoftype), + (__m128i*)(dest_with_offset + (i + 0x0c) * bytesoftype), ymm1[3]); + _mm256_storeu2_m128i( + (__m128i*)(dest_with_offset + (i + 0x0f) * bytesoftype), + (__m128i*)(dest_with_offset + (i + 0x0e) * bytesoftype), ymm1[7]); + _mm256_storeu2_m128i( + (__m128i*)(dest_with_offset + (i + 0x11) * bytesoftype), + (__m128i*)(dest_with_offset + (i + 0x10) * bytesoftype), ymm1[8]); + _mm256_storeu2_m128i( + (__m128i*)(dest_with_offset + (i + 0x13) * bytesoftype), + (__m128i*)(dest_with_offset + (i + 0x12) * bytesoftype), ymm1[12]); + _mm256_storeu2_m128i( + (__m128i*)(dest_with_offset + (i + 0x15) * bytesoftype), + (__m128i*)(dest_with_offset + (i + 0x14) * bytesoftype), ymm1[10]); + _mm256_storeu2_m128i( + (__m128i*)(dest_with_offset + (i + 0x17) * bytesoftype), + (__m128i*)(dest_with_offset + (i + 0x16) * bytesoftype), ymm1[14]); + _mm256_storeu2_m128i( + (__m128i*)(dest_with_offset + (i + 0x19) * bytesoftype), + (__m128i*)(dest_with_offset + (i + 0x18) * bytesoftype), ymm1[9]); + _mm256_storeu2_m128i( + (__m128i*)(dest_with_offset + (i + 0x1b) * bytesoftype), + (__m128i*)(dest_with_offset + (i + 0x1a) * bytesoftype), ymm1[13]); + _mm256_storeu2_m128i( + (__m128i*)(dest_with_offset + (i + 0x1d) * bytesoftype), + (__m128i*)(dest_with_offset + (i + 0x1c) * bytesoftype), ymm1[11]); + _mm256_storeu2_m128i( + (__m128i*)(dest_with_offset + (i + 0x1f) * bytesoftype), + (__m128i*)(dest_with_offset + (i + 0x1e) * bytesoftype), ymm1[15]); + } + } +} + +/* Shuffle a block. This can never fail. */ +void +blosc_internal_shuffle_avx2(const size_t bytesoftype, const size_t blocksize, + const uint8_t* const _src, uint8_t* const _dest) { + const size_t vectorized_chunk_size = bytesoftype * sizeof(__m256i); + + /* If the block size is too small to be vectorized, + use the generic implementation. */ + if (blocksize < vectorized_chunk_size) { + blosc_internal_shuffle_generic(bytesoftype, blocksize, _src, _dest); + return; + } + + /* If the blocksize is not a multiple of both the typesize and + the vector size, round the blocksize down to the next value + which is a multiple of both. The vectorized shuffle can be + used for that portion of the data, and the naive implementation + can be used for the remaining portion. */ + const size_t vectorizable_bytes = blocksize - (blocksize % vectorized_chunk_size); + + const size_t vectorizable_elements = vectorizable_bytes / bytesoftype; + const size_t total_elements = blocksize / bytesoftype; + + /* Optimized shuffle implementations */ + switch (bytesoftype) + { + case 2: + shuffle2_avx2(_dest, _src, vectorizable_elements, total_elements); + break; + case 4: + shuffle4_avx2(_dest, _src, vectorizable_elements, total_elements); + break; + case 8: + shuffle8_avx2(_dest, _src, vectorizable_elements, total_elements); + break; + case 16: + shuffle16_avx2(_dest, _src, vectorizable_elements, total_elements); + break; + default: + /* For types larger than 16 bytes, use the AVX2 tiled shuffle. */ + if (bytesoftype > sizeof(__m128i)) { + shuffle16_tiled_avx2(_dest, _src, vectorizable_elements, total_elements, bytesoftype); + } + else { + /* Non-optimized shuffle */ + blosc_internal_shuffle_generic(bytesoftype, blocksize, _src, _dest); + /* The non-optimized function covers the whole buffer, + so we're done processing here. */ + return; + } + } + + /* If the buffer had any bytes at the end which couldn't be handled + by the vectorized implementations, use the non-optimized version + to finish them up. */ + if (vectorizable_bytes < blocksize) { + shuffle_generic_inline(bytesoftype, vectorizable_bytes, blocksize, _src, _dest); + } +} + +/* Unshuffle a block. This can never fail. */ +void +blosc_internal_unshuffle_avx2(const size_t bytesoftype, const size_t blocksize, + const uint8_t* const _src, uint8_t* const _dest) { + const size_t vectorized_chunk_size = bytesoftype * sizeof(__m256i); + + /* If the block size is too small to be vectorized, + use the generic implementation. */ + if (blocksize < vectorized_chunk_size) { + blosc_internal_unshuffle_generic(bytesoftype, blocksize, _src, _dest); + return; + } + + /* If the blocksize is not a multiple of both the typesize and + the vector size, round the blocksize down to the next value + which is a multiple of both. The vectorized unshuffle can be + used for that portion of the data, and the naive implementation + can be used for the remaining portion. */ + const size_t vectorizable_bytes = blocksize - (blocksize % vectorized_chunk_size); + + const size_t vectorizable_elements = vectorizable_bytes / bytesoftype; + const size_t total_elements = blocksize / bytesoftype; + + /* Optimized unshuffle implementations */ + switch (bytesoftype) + { + case 2: + unshuffle2_avx2(_dest, _src, vectorizable_elements, total_elements); + break; + case 4: + unshuffle4_avx2(_dest, _src, vectorizable_elements, total_elements); + break; + case 8: + unshuffle8_avx2(_dest, _src, vectorizable_elements, total_elements); + break; + case 16: + unshuffle16_avx2(_dest, _src, vectorizable_elements, total_elements); + break; + default: + /* For types larger than 16 bytes, use the AVX2 tiled unshuffle. */ + if (bytesoftype > sizeof(__m128i)) { + unshuffle16_tiled_avx2(_dest, _src, vectorizable_elements, total_elements, bytesoftype); + } + else { + /* Non-optimized unshuffle */ + blosc_internal_unshuffle_generic(bytesoftype, blocksize, _src, _dest); + /* The non-optimized function covers the whole buffer, + so we're done processing here. */ + return; + } + } + + /* If the buffer had any bytes at the end which couldn't be handled + by the vectorized implementations, use the non-optimized version + to finish them up. */ + if (vectorizable_bytes < blocksize) { + unshuffle_generic_inline(bytesoftype, vectorizable_bytes, blocksize, _src, _dest); + } +} + +#endif /* !defined(__AVX2__) */ diff --git a/c-blosc/blosc/shuffle-avx2.h b/c-blosc/blosc/shuffle-avx2.h new file mode 100644 index 0000000..9cdd4ff --- /dev/null +++ b/c-blosc/blosc/shuffle-avx2.h @@ -0,0 +1,36 @@ +/********************************************************************* + Blosc - Blocked Shuffling and Compression Library + + Author: Francesc Alted + + See LICENSE.txt for details about copyright and rights to use. +**********************************************************************/ + +/* AVX2-accelerated shuffle/unshuffle routines. */ + +#ifndef SHUFFLE_AVX2_H +#define SHUFFLE_AVX2_H + +#include "blosc-common.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + AVX2-accelerated shuffle routine. +*/ +BLOSC_NO_EXPORT void blosc_internal_shuffle_avx2(const size_t bytesoftype, const size_t blocksize, + const uint8_t* const _src, uint8_t* const _dest); + +/** + AVX2-accelerated unshuffle routine. +*/ +BLOSC_NO_EXPORT void blosc_internal_unshuffle_avx2(const size_t bytesoftype, const size_t blocksize, + const uint8_t* const _src, uint8_t* const _dest); + +#ifdef __cplusplus +} +#endif + +#endif /* SHUFFLE_AVX2_H */ diff --git a/c-blosc/blosc/shuffle-generic.c b/c-blosc/blosc/shuffle-generic.c new file mode 100644 index 0000000..6b0dd04 --- /dev/null +++ b/c-blosc/blosc/shuffle-generic.c @@ -0,0 +1,25 @@ +/********************************************************************* + Blosc - Blocked Shuffling and Compression Library + + Author: Francesc Alted + + See LICENSE.txt for details about copyright and rights to use. +**********************************************************************/ + +#include "shuffle-generic.h" + +/* Shuffle a block. This can never fail. */ +void blosc_internal_shuffle_generic(const size_t bytesoftype, const size_t blocksize, + const uint8_t* const _src, uint8_t* const _dest) +{ + /* Non-optimized shuffle */ + shuffle_generic_inline(bytesoftype, 0, blocksize, _src, _dest); +} + +/* Unshuffle a block. This can never fail. */ +void blosc_internal_unshuffle_generic(const size_t bytesoftype, const size_t blocksize, + const uint8_t* const _src, uint8_t* const _dest) +{ + /* Non-optimized unshuffle */ + unshuffle_generic_inline(bytesoftype, 0, blocksize, _src, _dest); +} diff --git a/c-blosc/blosc/shuffle-generic.h b/c-blosc/blosc/shuffle-generic.h new file mode 100644 index 0000000..2504d77 --- /dev/null +++ b/c-blosc/blosc/shuffle-generic.h @@ -0,0 +1,99 @@ +/********************************************************************* + Blosc - Blocked Shuffling and Compression Library + + Author: Francesc Alted + + See LICENSE.txt for details about copyright and rights to use. +**********************************************************************/ + +/* Generic (non-hardware-accelerated) shuffle/unshuffle routines. + These are used when hardware-accelerated functions aren't available + for a particular platform; they are also used by the hardware- + accelerated functions to handle any remaining elements in a block + which isn't a multiple of the hardware's vector size. */ + +#ifndef SHUFFLE_GENERIC_H +#define SHUFFLE_GENERIC_H + +#include "blosc-common.h" +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + Generic (non-hardware-accelerated) shuffle routine. + This is the pure element-copying nested loop. It is used by the + generic shuffle implementation and also by the vectorized shuffle + implementations to process any remaining elements in a block which + is not a multiple of (type_size * vector_size). +*/ +static void shuffle_generic_inline(const size_t type_size, + const size_t vectorizable_blocksize, const size_t blocksize, + const uint8_t* const _src, uint8_t* const _dest) +{ + size_t i, j; + /* Calculate the number of elements in the block. */ + const size_t neblock_quot = blocksize / type_size; + const size_t neblock_rem = blocksize % type_size; + const size_t vectorizable_elements = vectorizable_blocksize / type_size; + + + /* Non-optimized shuffle */ + for (j = 0; j < type_size; j++) { + for (i = vectorizable_elements; i < (size_t)neblock_quot; i++) { + _dest[j*neblock_quot+i] = _src[i*type_size+j]; + } + } + + /* Copy any leftover bytes in the block without shuffling them. */ + memcpy(_dest + (blocksize - neblock_rem), _src + (blocksize - neblock_rem), neblock_rem); +} + +/** + Generic (non-hardware-accelerated) unshuffle routine. + This is the pure element-copying nested loop. It is used by the + generic unshuffle implementation and also by the vectorized unshuffle + implementations to process any remaining elements in a block which + is not a multiple of (type_size * vector_size). +*/ +static void unshuffle_generic_inline(const size_t type_size, + const size_t vectorizable_blocksize, const size_t blocksize, + const uint8_t* const _src, uint8_t* const _dest) +{ + size_t i, j; + + /* Calculate the number of elements in the block. */ + const size_t neblock_quot = blocksize / type_size; + const size_t neblock_rem = blocksize % type_size; + const size_t vectorizable_elements = vectorizable_blocksize / type_size; + + /* Non-optimized unshuffle */ + for (i = vectorizable_elements; i < (size_t)neblock_quot; i++) { + for (j = 0; j < type_size; j++) { + _dest[i*type_size+j] = _src[j*neblock_quot+i]; + } + } + + /* Copy any leftover bytes in the block without unshuffling them. */ + memcpy(_dest + (blocksize - neblock_rem), _src + (blocksize - neblock_rem), neblock_rem); +} + +/** + Generic (non-hardware-accelerated) shuffle routine. +*/ +BLOSC_NO_EXPORT void blosc_internal_shuffle_generic(const size_t bytesoftype, const size_t blocksize, + const uint8_t* const _src, uint8_t* const _dest); + +/** + Generic (non-hardware-accelerated) unshuffle routine. +*/ +BLOSC_NO_EXPORT void blosc_internal_unshuffle_generic(const size_t bytesoftype, const size_t blocksize, + const uint8_t* const _src, uint8_t* const _dest); + +#ifdef __cplusplus +} +#endif + +#endif /* SHUFFLE_GENERIC_H */ diff --git a/c-blosc/blosc/shuffle-sse2.c b/c-blosc/blosc/shuffle-sse2.c new file mode 100644 index 0000000..286f9d1 --- /dev/null +++ b/c-blosc/blosc/shuffle-sse2.c @@ -0,0 +1,640 @@ +/********************************************************************* + Blosc - Blocked Shuffling and Compression Library + + Author: Francesc Alted + + See LICENSE.txt for details about copyright and rights to use. +**********************************************************************/ + +#include "shuffle-generic.h" +#include "shuffle-sse2.h" + +/* Define dummy functions if SSE2 is not available for the compilation target and compiler. */ +#if !defined(__SSE2__) + +void +blosc_internal_shuffle_sse2(const size_t bytesoftype, const size_t blocksize, + const uint8_t* const _src, uint8_t* const _dest) { + abort(); +} + +void +blosc_internal_unshuffle_sse2(const size_t bytesoftype, const size_t blocksize, + const uint8_t* const _src, uint8_t* const _dest) { + abort(); +} + +# else /* defined(__SSE2__) */ + +#include + + +/* The next is useful for debugging purposes */ +#if 0 +#include +#include + +static void printxmm(__m128i xmm0) +{ + uint8_t buf[16]; + + ((__m128i *)buf)[0] = xmm0; + printf("%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x,%x\n", + buf[0], buf[1], buf[2], buf[3], + buf[4], buf[5], buf[6], buf[7], + buf[8], buf[9], buf[10], buf[11], + buf[12], buf[13], buf[14], buf[15]); +} +#endif + + +/* Routine optimized for shuffling a buffer for a type size of 2 bytes. */ +static void +shuffle2_sse2(uint8_t* const dest, const uint8_t* const src, + const size_t vectorizable_elements, const size_t total_elements) +{ + static const size_t bytesoftype = 2; + size_t j; + int k; + uint8_t* dest_for_jth_element; + __m128i xmm0[2], xmm1[2]; + + for (j = 0; j < vectorizable_elements; j += sizeof(__m128i)) { + /* Fetch 16 elements (32 bytes) then transpose bytes, words and double words. */ + for (k = 0; k < 2; k++) { + xmm0[k] = _mm_loadu_si128((__m128i*)(src + (j * bytesoftype) + (k * sizeof(__m128i)))); + xmm0[k] = _mm_shufflelo_epi16(xmm0[k], 0xd8); + xmm0[k] = _mm_shufflehi_epi16(xmm0[k], 0xd8); + xmm0[k] = _mm_shuffle_epi32(xmm0[k], 0xd8); + xmm1[k] = _mm_shuffle_epi32(xmm0[k], 0x4e); + xmm0[k] = _mm_unpacklo_epi8(xmm0[k], xmm1[k]); + xmm0[k] = _mm_shuffle_epi32(xmm0[k], 0xd8); + xmm1[k] = _mm_shuffle_epi32(xmm0[k], 0x4e); + xmm0[k] = _mm_unpacklo_epi16(xmm0[k], xmm1[k]); + xmm0[k] = _mm_shuffle_epi32(xmm0[k], 0xd8); + } + /* Transpose quad words */ + for (k = 0; k < 1; k++) { + xmm1[k*2] = _mm_unpacklo_epi64(xmm0[k], xmm0[k+1]); + xmm1[k*2+1] = _mm_unpackhi_epi64(xmm0[k], xmm0[k+1]); + } + /* Store the result vectors */ + dest_for_jth_element = dest + j; + for (k = 0; k < 2; k++) { + _mm_storeu_si128((__m128i*)(dest_for_jth_element + (k * total_elements)), xmm1[k]); + } + } +} + +/* Routine optimized for shuffling a buffer for a type size of 4 bytes. */ +static void +shuffle4_sse2(uint8_t* const dest, const uint8_t* const src, + const size_t vectorizable_elements, const size_t total_elements) +{ + static const size_t bytesoftype = 4; + size_t i; + int j; + uint8_t* dest_for_ith_element; + __m128i xmm0[4], xmm1[4]; + + for (i = 0; i < vectorizable_elements; i += sizeof(__m128i)) { + /* Fetch 16 elements (64 bytes) then transpose bytes and words. */ + for (j = 0; j < 4; j++) { + xmm0[j] = _mm_loadu_si128((__m128i*)(src + (i * bytesoftype) + (j * sizeof(__m128i)))); + xmm1[j] = _mm_shuffle_epi32(xmm0[j], 0xd8); + xmm0[j] = _mm_shuffle_epi32(xmm0[j], 0x8d); + xmm0[j] = _mm_unpacklo_epi8(xmm1[j], xmm0[j]); + xmm1[j] = _mm_shuffle_epi32(xmm0[j], 0x04e); + xmm0[j] = _mm_unpacklo_epi16(xmm0[j], xmm1[j]); + } + /* Transpose double words */ + for (j = 0; j < 2; j++) { + xmm1[j*2] = _mm_unpacklo_epi32(xmm0[j*2], xmm0[j*2+1]); + xmm1[j*2+1] = _mm_unpackhi_epi32(xmm0[j*2], xmm0[j*2+1]); + } + /* Transpose quad words */ + for (j = 0; j < 2; j++) { + xmm0[j*2] = _mm_unpacklo_epi64(xmm1[j], xmm1[j+2]); + xmm0[j*2+1] = _mm_unpackhi_epi64(xmm1[j], xmm1[j+2]); + } + /* Store the result vectors */ + dest_for_ith_element = dest + i; + for (j = 0; j < 4; j++) { + _mm_storeu_si128((__m128i*)(dest_for_ith_element + (j * total_elements)), xmm0[j]); + } + } +} + +/* Routine optimized for shuffling a buffer for a type size of 8 bytes. */ +static void +shuffle8_sse2(uint8_t* const dest, const uint8_t* const src, + const size_t vectorizable_elements, const size_t total_elements) +{ + static const size_t bytesoftype = 8; + size_t j; + int k, l; + uint8_t* dest_for_jth_element; + __m128i xmm0[8], xmm1[8]; + + for (j = 0; j < vectorizable_elements; j += sizeof(__m128i)) { + /* Fetch 16 elements (128 bytes) then transpose bytes. */ + for (k = 0; k < 8; k++) { + xmm0[k] = _mm_loadu_si128((__m128i*)(src + (j * bytesoftype) + (k * sizeof(__m128i)))); + xmm1[k] = _mm_shuffle_epi32(xmm0[k], 0x4e); + xmm1[k] = _mm_unpacklo_epi8(xmm0[k], xmm1[k]); + } + /* Transpose words */ + for (k = 0, l = 0; k < 4; k++, l +=2) { + xmm0[k*2] = _mm_unpacklo_epi16(xmm1[l], xmm1[l+1]); + xmm0[k*2+1] = _mm_unpackhi_epi16(xmm1[l], xmm1[l+1]); + } + /* Transpose double words */ + for (k = 0, l = 0; k < 4; k++, l++) { + if (k == 2) l += 2; + xmm1[k*2] = _mm_unpacklo_epi32(xmm0[l], xmm0[l+2]); + xmm1[k*2+1] = _mm_unpackhi_epi32(xmm0[l], xmm0[l+2]); + } + /* Transpose quad words */ + for (k = 0; k < 4; k++) { + xmm0[k*2] = _mm_unpacklo_epi64(xmm1[k], xmm1[k+4]); + xmm0[k*2+1] = _mm_unpackhi_epi64(xmm1[k], xmm1[k+4]); + } + /* Store the result vectors */ + dest_for_jth_element = dest + j; + for (k = 0; k < 8; k++) { + _mm_storeu_si128((__m128i*)(dest_for_jth_element + (k * total_elements)), xmm0[k]); + } + } +} + +/* Routine optimized for shuffling a buffer for a type size of 16 bytes. */ +static void +shuffle16_sse2(uint8_t* const dest, const uint8_t* const src, + const size_t vectorizable_elements, const size_t total_elements) +{ + static const size_t bytesoftype = 16; + size_t j; + int k, l; + uint8_t* dest_for_jth_element; + __m128i xmm0[16], xmm1[16]; + + for (j = 0; j < vectorizable_elements; j += sizeof(__m128i)) { + /* Fetch 16 elements (256 bytes). */ + for (k = 0; k < 16; k++) { + xmm0[k] = _mm_loadu_si128((__m128i*)(src + (j * bytesoftype) + (k * sizeof(__m128i)))); + } + /* Transpose bytes */ + for (k = 0, l = 0; k < 8; k++, l +=2) { + xmm1[k*2] = _mm_unpacklo_epi8(xmm0[l], xmm0[l+1]); + xmm1[k*2+1] = _mm_unpackhi_epi8(xmm0[l], xmm0[l+1]); + } + /* Transpose words */ + for (k = 0, l = -2; k < 8; k++, l++) { + if ((k%2) == 0) l += 2; + xmm0[k*2] = _mm_unpacklo_epi16(xmm1[l], xmm1[l+2]); + xmm0[k*2+1] = _mm_unpackhi_epi16(xmm1[l], xmm1[l+2]); + } + /* Transpose double words */ + for (k = 0, l = -4; k < 8; k++, l++) { + if ((k%4) == 0) l += 4; + xmm1[k*2] = _mm_unpacklo_epi32(xmm0[l], xmm0[l+4]); + xmm1[k*2+1] = _mm_unpackhi_epi32(xmm0[l], xmm0[l+4]); + } + /* Transpose quad words */ + for (k = 0; k < 8; k++) { + xmm0[k*2] = _mm_unpacklo_epi64(xmm1[k], xmm1[k+8]); + xmm0[k*2+1] = _mm_unpackhi_epi64(xmm1[k], xmm1[k+8]); + } + /* Store the result vectors */ + dest_for_jth_element = dest + j; + for (k = 0; k < 16; k++) { + _mm_storeu_si128((__m128i*)(dest_for_jth_element + (k * total_elements)), xmm0[k]); + } + } +} + +/* Routine optimized for shuffling a buffer for a type size larger than 16 bytes. */ +static void +shuffle16_tiled_sse2(uint8_t* const dest, const uint8_t* const src, + const size_t vectorizable_elements, const size_t total_elements, const size_t bytesoftype) +{ + size_t j; + const size_t vecs_per_el_rem = bytesoftype % sizeof(__m128i); + int k, l; + uint8_t* dest_for_jth_element; + __m128i xmm0[16], xmm1[16]; + + for (j = 0; j < vectorizable_elements; j += sizeof(__m128i)) { + /* Advance the offset into the type by the vector size (in bytes), unless this is + the initial iteration and the type size is not a multiple of the vector size. + In that case, only advance by the number of bytes necessary so that the number + of remaining bytes in the type will be a multiple of the vector size. */ + size_t offset_into_type; + for (offset_into_type = 0; offset_into_type < bytesoftype; + offset_into_type += (offset_into_type == 0 && vecs_per_el_rem > 0 ? vecs_per_el_rem : sizeof(__m128i))) { + + /* Fetch elements in groups of 256 bytes */ + const uint8_t* const src_with_offset = src + offset_into_type; + for (k = 0; k < 16; k++) { + xmm0[k] = _mm_loadu_si128((__m128i*)(src_with_offset + (j + k) * bytesoftype)); + } + /* Transpose bytes */ + for (k = 0, l = 0; k < 8; k++, l +=2) { + xmm1[k*2] = _mm_unpacklo_epi8(xmm0[l], xmm0[l+1]); + xmm1[k*2+1] = _mm_unpackhi_epi8(xmm0[l], xmm0[l+1]); + } + /* Transpose words */ + for (k = 0, l = -2; k < 8; k++, l++) { + if ((k%2) == 0) l += 2; + xmm0[k*2] = _mm_unpacklo_epi16(xmm1[l], xmm1[l+2]); + xmm0[k*2+1] = _mm_unpackhi_epi16(xmm1[l], xmm1[l+2]); + } + /* Transpose double words */ + for (k = 0, l = -4; k < 8; k++, l++) { + if ((k%4) == 0) l += 4; + xmm1[k*2] = _mm_unpacklo_epi32(xmm0[l], xmm0[l+4]); + xmm1[k*2+1] = _mm_unpackhi_epi32(xmm0[l], xmm0[l+4]); + } + /* Transpose quad words */ + for (k = 0; k < 8; k++) { + xmm0[k*2] = _mm_unpacklo_epi64(xmm1[k], xmm1[k+8]); + xmm0[k*2+1] = _mm_unpackhi_epi64(xmm1[k], xmm1[k+8]); + } + /* Store the result vectors */ + dest_for_jth_element = dest + j; + for (k = 0; k < 16; k++) { + _mm_storeu_si128((__m128i*)(dest_for_jth_element + (total_elements * (offset_into_type + k))), xmm0[k]); + } + } + } +} + +/* Routine optimized for unshuffling a buffer for a type size of 2 bytes. */ +static void +unshuffle2_sse2(uint8_t* const dest, const uint8_t* const src, + const size_t vectorizable_elements, const size_t total_elements) +{ + static const size_t bytesoftype = 2; + size_t i; + int j; + __m128i xmm0[2], xmm1[2]; + + for (i = 0; i < vectorizable_elements; i += sizeof(__m128i)) { + /* Load 16 elements (32 bytes) into 2 XMM registers. */ + const uint8_t* const src_for_ith_element = src + i; + for (j = 0; j < 2; j++) { + xmm0[j] = _mm_loadu_si128((__m128i*)(src_for_ith_element + (j * total_elements))); + } + /* Shuffle bytes */ + /* Compute the low 32 bytes */ + xmm1[0] = _mm_unpacklo_epi8(xmm0[0], xmm0[1]); + /* Compute the hi 32 bytes */ + xmm1[1] = _mm_unpackhi_epi8(xmm0[0], xmm0[1]); + /* Store the result vectors in proper order */ + _mm_storeu_si128((__m128i*)(dest + (i * bytesoftype) + (0 * sizeof(__m128i))), xmm1[0]); + _mm_storeu_si128((__m128i*)(dest + (i * bytesoftype) + (1 * sizeof(__m128i))), xmm1[1]); + } +} + +/* Routine optimized for unshuffling a buffer for a type size of 4 bytes. */ +static void +unshuffle4_sse2(uint8_t* const dest, const uint8_t* const src, + const size_t vectorizable_elements, const size_t total_elements) +{ + static const size_t bytesoftype = 4; + size_t i; + int j; + __m128i xmm0[4], xmm1[4]; + + for (i = 0; i < vectorizable_elements; i += sizeof(__m128i)) { + /* Load 16 elements (64 bytes) into 4 XMM registers. */ + const uint8_t* const src_for_ith_element = src + i; + for (j = 0; j < 4; j++) { + xmm0[j] = _mm_loadu_si128((__m128i*)(src_for_ith_element + (j * total_elements))); + } + /* Shuffle bytes */ + for (j = 0; j < 2; j++) { + /* Compute the low 32 bytes */ + xmm1[j] = _mm_unpacklo_epi8(xmm0[j*2], xmm0[j*2+1]); + /* Compute the hi 32 bytes */ + xmm1[2+j] = _mm_unpackhi_epi8(xmm0[j*2], xmm0[j*2+1]); + } + /* Shuffle 2-byte words */ + for (j = 0; j < 2; j++) { + /* Compute the low 32 bytes */ + xmm0[j] = _mm_unpacklo_epi16(xmm1[j*2], xmm1[j*2+1]); + /* Compute the hi 32 bytes */ + xmm0[2+j] = _mm_unpackhi_epi16(xmm1[j*2], xmm1[j*2+1]); + } + /* Store the result vectors in proper order */ + _mm_storeu_si128((__m128i*)(dest + (i * bytesoftype) + (0 * sizeof(__m128i))), xmm0[0]); + _mm_storeu_si128((__m128i*)(dest + (i * bytesoftype) + (1 * sizeof(__m128i))), xmm0[2]); + _mm_storeu_si128((__m128i*)(dest + (i * bytesoftype) + (2 * sizeof(__m128i))), xmm0[1]); + _mm_storeu_si128((__m128i*)(dest + (i * bytesoftype) + (3 * sizeof(__m128i))), xmm0[3]); + } +} + +/* Routine optimized for unshuffling a buffer for a type size of 8 bytes. */ +static void +unshuffle8_sse2(uint8_t* const dest, const uint8_t* const src, + const size_t vectorizable_elements, const size_t total_elements) +{ + static const size_t bytesoftype = 8; + size_t i; + int j; + __m128i xmm0[8], xmm1[8]; + + for (i = 0; i < vectorizable_elements; i += sizeof(__m128i)) { + /* Load 16 elements (128 bytes) into 8 XMM registers. */ + const uint8_t* const src_for_ith_element = src + i; + for (j = 0; j < 8; j++) { + xmm0[j] = _mm_loadu_si128((__m128i*)(src_for_ith_element + (j * total_elements))); + } + /* Shuffle bytes */ + for (j = 0; j < 4; j++) { + /* Compute the low 32 bytes */ + xmm1[j] = _mm_unpacklo_epi8(xmm0[j*2], xmm0[j*2+1]); + /* Compute the hi 32 bytes */ + xmm1[4+j] = _mm_unpackhi_epi8(xmm0[j*2], xmm0[j*2+1]); + } + /* Shuffle 2-byte words */ + for (j = 0; j < 4; j++) { + /* Compute the low 32 bytes */ + xmm0[j] = _mm_unpacklo_epi16(xmm1[j*2], xmm1[j*2+1]); + /* Compute the hi 32 bytes */ + xmm0[4+j] = _mm_unpackhi_epi16(xmm1[j*2], xmm1[j*2+1]); + } + /* Shuffle 4-byte dwords */ + for (j = 0; j < 4; j++) { + /* Compute the low 32 bytes */ + xmm1[j] = _mm_unpacklo_epi32(xmm0[j*2], xmm0[j*2+1]); + /* Compute the hi 32 bytes */ + xmm1[4+j] = _mm_unpackhi_epi32(xmm0[j*2], xmm0[j*2+1]); + } + /* Store the result vectors in proper order */ + _mm_storeu_si128((__m128i*)(dest + (i * bytesoftype) + (0 * sizeof(__m128i))), xmm1[0]); + _mm_storeu_si128((__m128i*)(dest + (i * bytesoftype) + (1 * sizeof(__m128i))), xmm1[4]); + _mm_storeu_si128((__m128i*)(dest + (i * bytesoftype) + (2 * sizeof(__m128i))), xmm1[2]); + _mm_storeu_si128((__m128i*)(dest + (i * bytesoftype) + (3 * sizeof(__m128i))), xmm1[6]); + _mm_storeu_si128((__m128i*)(dest + (i * bytesoftype) + (4 * sizeof(__m128i))), xmm1[1]); + _mm_storeu_si128((__m128i*)(dest + (i * bytesoftype) + (5 * sizeof(__m128i))), xmm1[5]); + _mm_storeu_si128((__m128i*)(dest + (i * bytesoftype) + (6 * sizeof(__m128i))), xmm1[3]); + _mm_storeu_si128((__m128i*)(dest + (i * bytesoftype) + (7 * sizeof(__m128i))), xmm1[7]); + } +} + +/* Routine optimized for unshuffling a buffer for a type size of 16 bytes. */ +static void +unshuffle16_sse2(uint8_t* const dest, const uint8_t* const src, + const size_t vectorizable_elements, const size_t total_elements) +{ + static const size_t bytesoftype = 16; + size_t i; + int j; + __m128i xmm1[16], xmm2[16]; + + for (i = 0; i < vectorizable_elements; i += sizeof(__m128i)) { + /* Load 16 elements (256 bytes) into 16 XMM registers. */ + const uint8_t* const src_for_ith_element = src + i; + for (j = 0; j < 16; j++) { + xmm1[j] = _mm_loadu_si128((__m128i*)(src_for_ith_element + (j * total_elements))); + } + /* Shuffle bytes */ + for (j = 0; j < 8; j++) { + /* Compute the low 32 bytes */ + xmm2[j] = _mm_unpacklo_epi8(xmm1[j*2], xmm1[j*2+1]); + /* Compute the hi 32 bytes */ + xmm2[8+j] = _mm_unpackhi_epi8(xmm1[j*2], xmm1[j*2+1]); + } + /* Shuffle 2-byte words */ + for (j = 0; j < 8; j++) { + /* Compute the low 32 bytes */ + xmm1[j] = _mm_unpacklo_epi16(xmm2[j*2], xmm2[j*2+1]); + /* Compute the hi 32 bytes */ + xmm1[8+j] = _mm_unpackhi_epi16(xmm2[j*2], xmm2[j*2+1]); + } + /* Shuffle 4-byte dwords */ + for (j = 0; j < 8; j++) { + /* Compute the low 32 bytes */ + xmm2[j] = _mm_unpacklo_epi32(xmm1[j*2], xmm1[j*2+1]); + /* Compute the hi 32 bytes */ + xmm2[8+j] = _mm_unpackhi_epi32(xmm1[j*2], xmm1[j*2+1]); + } + /* Shuffle 8-byte qwords */ + for (j = 0; j < 8; j++) { + /* Compute the low 32 bytes */ + xmm1[j] = _mm_unpacklo_epi64(xmm2[j*2], xmm2[j*2+1]); + /* Compute the hi 32 bytes */ + xmm1[8+j] = _mm_unpackhi_epi64(xmm2[j*2], xmm2[j*2+1]); + } + + /* Store the result vectors in proper order */ + _mm_storeu_si128((__m128i*)(dest + (i * bytesoftype) + (0 * sizeof(__m128i))), xmm1[0]); + _mm_storeu_si128((__m128i*)(dest + (i * bytesoftype) + (1 * sizeof(__m128i))), xmm1[8]); + _mm_storeu_si128((__m128i*)(dest + (i * bytesoftype) + (2 * sizeof(__m128i))), xmm1[4]); + _mm_storeu_si128((__m128i*)(dest + (i * bytesoftype) + (3 * sizeof(__m128i))), xmm1[12]); + _mm_storeu_si128((__m128i*)(dest + (i * bytesoftype) + (4 * sizeof(__m128i))), xmm1[2]); + _mm_storeu_si128((__m128i*)(dest + (i * bytesoftype) + (5 * sizeof(__m128i))), xmm1[10]); + _mm_storeu_si128((__m128i*)(dest + (i * bytesoftype) + (6 * sizeof(__m128i))), xmm1[6]); + _mm_storeu_si128((__m128i*)(dest + (i * bytesoftype) + (7 * sizeof(__m128i))), xmm1[14]); + _mm_storeu_si128((__m128i*)(dest + (i * bytesoftype) + (8 * sizeof(__m128i))), xmm1[1]); + _mm_storeu_si128((__m128i*)(dest + (i * bytesoftype) + (9 * sizeof(__m128i))), xmm1[9]); + _mm_storeu_si128((__m128i*)(dest + (i * bytesoftype) + (10 * sizeof(__m128i))), xmm1[5]); + _mm_storeu_si128((__m128i*)(dest + (i * bytesoftype) + (11 * sizeof(__m128i))), xmm1[13]); + _mm_storeu_si128((__m128i*)(dest + (i * bytesoftype) + (12 * sizeof(__m128i))), xmm1[3]); + _mm_storeu_si128((__m128i*)(dest + (i * bytesoftype) + (13 * sizeof(__m128i))), xmm1[11]); + _mm_storeu_si128((__m128i*)(dest + (i * bytesoftype) + (14 * sizeof(__m128i))), xmm1[7]); + _mm_storeu_si128((__m128i*)(dest + (i * bytesoftype) + (15 * sizeof(__m128i))), xmm1[15]); + } +} + +/* Routine optimized for unshuffling a buffer for a type size larger than 16 bytes. */ +static void +unshuffle16_tiled_sse2(uint8_t* const dest, const uint8_t* const orig, + const size_t vectorizable_elements, const size_t total_elements, const size_t bytesoftype) +{ + size_t i; + const size_t vecs_per_el_rem = bytesoftype % sizeof(__m128i); + + int j; + uint8_t* dest_with_offset; + __m128i xmm1[16], xmm2[16]; + + /* The unshuffle loops are inverted (compared to shuffle_tiled16_sse2) + to optimize cache utilization. */ + size_t offset_into_type; + for (offset_into_type = 0; offset_into_type < bytesoftype; + offset_into_type += (offset_into_type == 0 && vecs_per_el_rem > 0 ? vecs_per_el_rem : sizeof(__m128i))) { + for (i = 0; i < vectorizable_elements; i += sizeof(__m128i)) { + /* Load the first 128 bytes in 16 XMM registers */ + const uint8_t* const src_for_ith_element = orig + i; + for (j = 0; j < 16; j++) { + xmm1[j] = _mm_loadu_si128((__m128i*)(src_for_ith_element + (total_elements * (offset_into_type + j)))); + } + /* Shuffle bytes */ + for (j = 0; j < 8; j++) { + /* Compute the low 32 bytes */ + xmm2[j] = _mm_unpacklo_epi8(xmm1[j*2], xmm1[j*2+1]); + /* Compute the hi 32 bytes */ + xmm2[8+j] = _mm_unpackhi_epi8(xmm1[j*2], xmm1[j*2+1]); + } + /* Shuffle 2-byte words */ + for (j = 0; j < 8; j++) { + /* Compute the low 32 bytes */ + xmm1[j] = _mm_unpacklo_epi16(xmm2[j*2], xmm2[j*2+1]); + /* Compute the hi 32 bytes */ + xmm1[8+j] = _mm_unpackhi_epi16(xmm2[j*2], xmm2[j*2+1]); + } + /* Shuffle 4-byte dwords */ + for (j = 0; j < 8; j++) { + /* Compute the low 32 bytes */ + xmm2[j] = _mm_unpacklo_epi32(xmm1[j*2], xmm1[j*2+1]); + /* Compute the hi 32 bytes */ + xmm2[8+j] = _mm_unpackhi_epi32(xmm1[j*2], xmm1[j*2+1]); + } + /* Shuffle 8-byte qwords */ + for (j = 0; j < 8; j++) { + /* Compute the low 32 bytes */ + xmm1[j] = _mm_unpacklo_epi64(xmm2[j*2], xmm2[j*2+1]); + /* Compute the hi 32 bytes */ + xmm1[8+j] = _mm_unpackhi_epi64(xmm2[j*2], xmm2[j*2+1]); + } + + /* Store the result vectors in proper order */ + dest_with_offset = dest + offset_into_type; + _mm_storeu_si128((__m128i*)(dest_with_offset + (i + 0) * bytesoftype), xmm1[0]); + _mm_storeu_si128((__m128i*)(dest_with_offset + (i + 1) * bytesoftype), xmm1[8]); + _mm_storeu_si128((__m128i*)(dest_with_offset + (i + 2) * bytesoftype), xmm1[4]); + _mm_storeu_si128((__m128i*)(dest_with_offset + (i + 3) * bytesoftype), xmm1[12]); + _mm_storeu_si128((__m128i*)(dest_with_offset + (i + 4) * bytesoftype), xmm1[2]); + _mm_storeu_si128((__m128i*)(dest_with_offset + (i + 5) * bytesoftype), xmm1[10]); + _mm_storeu_si128((__m128i*)(dest_with_offset + (i + 6) * bytesoftype), xmm1[6]); + _mm_storeu_si128((__m128i*)(dest_with_offset + (i + 7) * bytesoftype), xmm1[14]); + _mm_storeu_si128((__m128i*)(dest_with_offset + (i + 8) * bytesoftype), xmm1[1]); + _mm_storeu_si128((__m128i*)(dest_with_offset + (i + 9) * bytesoftype), xmm1[9]); + _mm_storeu_si128((__m128i*)(dest_with_offset + (i + 10) * bytesoftype), xmm1[5]); + _mm_storeu_si128((__m128i*)(dest_with_offset + (i + 11) * bytesoftype), xmm1[13]); + _mm_storeu_si128((__m128i*)(dest_with_offset + (i + 12) * bytesoftype), xmm1[3]); + _mm_storeu_si128((__m128i*)(dest_with_offset + (i + 13) * bytesoftype), xmm1[11]); + _mm_storeu_si128((__m128i*)(dest_with_offset + (i + 14) * bytesoftype), xmm1[7]); + _mm_storeu_si128((__m128i*)(dest_with_offset + (i + 15) * bytesoftype), xmm1[15]); + } + } +} + +/* Shuffle a block. This can never fail. */ +void +blosc_internal_shuffle_sse2(const size_t bytesoftype, const size_t blocksize, + const uint8_t* const _src, uint8_t* const _dest) { + const size_t vectorized_chunk_size = bytesoftype * sizeof(__m128i); + /* If the blocksize is not a multiple of both the typesize and + the vector size, round the blocksize down to the next value + which is a multiple of both. The vectorized shuffle can be + used for that portion of the data, and the naive implementation + can be used for the remaining portion. */ + const size_t vectorizable_bytes = blocksize - (blocksize % vectorized_chunk_size); + const size_t vectorizable_elements = vectorizable_bytes / bytesoftype; + const size_t total_elements = blocksize / bytesoftype; + + /* If the block size is too small to be vectorized, + use the generic implementation. */ + if (blocksize < vectorized_chunk_size) { + blosc_internal_shuffle_generic(bytesoftype, blocksize, _src, _dest); + return; + } + + /* Optimized shuffle implementations */ + switch (bytesoftype) + { + case 2: + shuffle2_sse2(_dest, _src, vectorizable_elements, total_elements); + break; + case 4: + shuffle4_sse2(_dest, _src, vectorizable_elements, total_elements); + break; + case 8: + shuffle8_sse2(_dest, _src, vectorizable_elements, total_elements); + break; + case 16: + shuffle16_sse2(_dest, _src, vectorizable_elements, total_elements); + break; + default: + if (bytesoftype > sizeof(__m128i)) { + shuffle16_tiled_sse2(_dest, _src, vectorizable_elements, total_elements, bytesoftype); + } + else { + /* Non-optimized shuffle */ + blosc_internal_shuffle_generic(bytesoftype, blocksize, _src, _dest); + /* The non-optimized function covers the whole buffer, + so we're done processing here. */ + return; + } + } + + /* If the buffer had any bytes at the end which couldn't be handled + by the vectorized implementations, use the non-optimized version + to finish them up. */ + if (vectorizable_bytes < blocksize) { + shuffle_generic_inline(bytesoftype, vectorizable_bytes, blocksize, _src, _dest); + } +} + +/* Unshuffle a block. This can never fail. */ +void +blosc_internal_unshuffle_sse2(const size_t bytesoftype, const size_t blocksize, + const uint8_t* const _src, uint8_t* const _dest) { + const size_t vectorized_chunk_size = bytesoftype * sizeof(__m128i); + /* If the blocksize is not a multiple of both the typesize and + the vector size, round the blocksize down to the next value + which is a multiple of both. The vectorized unshuffle can be + used for that portion of the data, and the naive implementation + can be used for the remaining portion. */ + const size_t vectorizable_bytes = blocksize - (blocksize % vectorized_chunk_size); + const size_t vectorizable_elements = vectorizable_bytes / bytesoftype; + const size_t total_elements = blocksize / bytesoftype; + + + /* If the block size is too small to be vectorized, + use the generic implementation. */ + if (blocksize < vectorized_chunk_size) { + blosc_internal_unshuffle_generic(bytesoftype, blocksize, _src, _dest); + return; + } + + /* Optimized unshuffle implementations */ + switch (bytesoftype) + { + case 2: + unshuffle2_sse2(_dest, _src, vectorizable_elements, total_elements); + break; + case 4: + unshuffle4_sse2(_dest, _src, vectorizable_elements, total_elements); + break; + case 8: + unshuffle8_sse2(_dest, _src, vectorizable_elements, total_elements); + break; + case 16: + unshuffle16_sse2(_dest, _src, vectorizable_elements, total_elements); + break; + default: + if (bytesoftype > sizeof(__m128i)) { + unshuffle16_tiled_sse2(_dest, _src, vectorizable_elements, total_elements, bytesoftype); + } + else { + /* Non-optimized unshuffle */ + blosc_internal_unshuffle_generic(bytesoftype, blocksize, _src, _dest); + /* The non-optimized function covers the whole buffer, + so we're done processing here. */ + return; + } + } + + /* If the buffer had any bytes at the end which couldn't be handled + by the vectorized implementations, use the non-optimized version + to finish them up. */ + if (vectorizable_bytes < blocksize) { + unshuffle_generic_inline(bytesoftype, vectorizable_bytes, blocksize, _src, _dest); + } +} + +#endif /* !defined(__SSE2__) */ diff --git a/c-blosc/blosc/shuffle-sse2.h b/c-blosc/blosc/shuffle-sse2.h new file mode 100644 index 0000000..531952a --- /dev/null +++ b/c-blosc/blosc/shuffle-sse2.h @@ -0,0 +1,36 @@ +/********************************************************************* + Blosc - Blocked Shuffling and Compression Library + + Author: Francesc Alted + + See LICENSE.txt for details about copyright and rights to use. +**********************************************************************/ + +/* SSE2-accelerated shuffle/unshuffle routines. */ + +#ifndef SHUFFLE_SSE2_H +#define SHUFFLE_SSE2_H + +#include "blosc-common.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + SSE2-accelerated shuffle routine. +*/ +BLOSC_NO_EXPORT void blosc_internal_shuffle_sse2(const size_t bytesoftype, const size_t blocksize, + const uint8_t* const _src, uint8_t* const _dest); + +/** + SSE2-accelerated unshuffle routine. +*/ +BLOSC_NO_EXPORT void blosc_internal_unshuffle_sse2(const size_t bytesoftype, const size_t blocksize, + const uint8_t* const _src, uint8_t* const _dest); + +#ifdef __cplusplus +} +#endif + +#endif /* SHUFFLE_SSE2_H */ diff --git a/c-blosc/blosc/shuffle.c b/c-blosc/blosc/shuffle.c new file mode 100644 index 0000000..e680a17 --- /dev/null +++ b/c-blosc/blosc/shuffle.c @@ -0,0 +1,453 @@ +/********************************************************************* + Blosc - Blocked Shuffling and Compression Library + + Author: Francesc Alted + Creation date: 2009-05-20 + + See LICENSE.txt for details about copyright and rights to use. +**********************************************************************/ + +#include "shuffle.h" +#include "blosc-common.h" +#include "shuffle-generic.h" +#include "bitshuffle-generic.h" +#include "blosc-comp-features.h" +#include + +#if defined(_WIN32) +#include "win32/pthread.h" +#else +#include +#endif + +/* Visual Studio < 2013 does not have stdbool.h so here it is a replacement: */ +#if defined __STDC__ && defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L +/* have a C99 compiler */ +typedef _Bool bool; +#else +/* do not have a C99 compiler */ +typedef unsigned char bool; +#endif + + +#if !defined(__clang__) && defined(__GNUC__) && defined(__GNUC_MINOR__) && \ + __GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8) +#define HAVE_CPU_FEAT_INTRIN +#endif + + +/* Include hardware-accelerated shuffle/unshuffle routines based on + the target architecture. Note that a target architecture may support + more than one type of acceleration!*/ +#if defined(SHUFFLE_AVX2_ENABLED) + #include "shuffle-avx2.h" + #include "bitshuffle-avx2.h" +#endif /* defined(SHUFFLE_AVX2_ENABLED) */ + +#if defined(SHUFFLE_SSE2_ENABLED) + #include "shuffle-sse2.h" + #include "bitshuffle-sse2.h" +#endif /* defined(SHUFFLE_SSE2_ENABLED) */ + + +/* Define function pointer types for shuffle/unshuffle routines. */ +typedef void(*shuffle_func)(const size_t, const size_t, const uint8_t*, const uint8_t*); +typedef void(*unshuffle_func)(const size_t, const size_t, const uint8_t*, const uint8_t*); +typedef int64_t(*bitshuffle_func)(void*, void*, const size_t, const size_t, void*); +typedef int64_t(*bitunshuffle_func)(void*, void*, const size_t, const size_t, void*); + +/* An implementation of shuffle/unshuffle routines. */ +typedef struct shuffle_implementation { + /* Name of this implementation. */ + const char* name; + /* Function pointer to the shuffle routine for this implementation. */ + shuffle_func shuffle; + /* Function pointer to the unshuffle routine for this implementation. */ + unshuffle_func unshuffle; + /* Function pointer to the bitshuffle routine for this implementation. */ + bitshuffle_func bitshuffle; + /* Function pointer to the bitunshuffle routine for this implementation. */ + bitunshuffle_func bitunshuffle; +} shuffle_implementation_t; + +typedef enum { + BLOSC_HAVE_NOTHING = 0, + BLOSC_HAVE_SSE2 = 1, + BLOSC_HAVE_AVX2 = 2 +} blosc_cpu_features; + +/* Detect hardware and set function pointers to the best shuffle/unshuffle + implementations supported by the host processor for Intel/i686 + */ +#if (defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || defined(_M_IX86)) \ + && (defined(SHUFFLE_AVX2_ENABLED) || defined(SHUFFLE_SSE2_ENABLED)) + +/* Disabled the __builtin_cpu_supports() call, as it has issues with + new versions of gcc (like 5.3.1 in forthcoming ubuntu/xenial: + "undefined symbol: __cpu_model" + For a similar report, see: + https://lists.fedoraproject.org/archives/list/devel@lists.fedoraproject.org/thread/ZM2L65WIZEEQHHLFERZYD5FAG7QY2OGB/ +*/ +#if defined(HAVE_CPU_FEAT_INTRIN) && 0 +static blosc_cpu_features blosc_get_cpu_features(void) { + blosc_cpu_features cpu_features = BLOSC_HAVE_NOTHING; + if (__builtin_cpu_supports("sse2")) { + cpu_features |= BLOSC_HAVE_SSE2; + } + if (__builtin_cpu_supports("avx2")) { + cpu_features |= BLOSC_HAVE_AVX2; + } + return cpu_features; +} +#else + +#if defined(_MSC_VER) && !defined(__clang__) + #include /* Needed for __cpuid */ + +/* _xgetbv is only supported by VS2010 SP1 and newer versions of VS. */ +#if _MSC_FULL_VER >= 160040219 + #include /* Needed for _xgetbv */ + #define blosc_internal_xgetbv _xgetbv +#elif defined(_M_IX86) + +/* Implement _xgetbv for VS2008 and VS2010 RTM with 32-bit (x86) targets. */ + +static uint64_t blosc_internal_xgetbv(uint32_t xcr) { + uint32_t xcr0, xcr1; + __asm { + mov ecx, xcr + _asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0 + mov xcr0, eax + mov xcr1, edx + } + return ((uint64_t)xcr1 << 32) | xcr0; +} + +#elif defined(_M_X64) + +/* Implement _xgetbv for VS2008 and VS2010 RTM with 64-bit (x64) targets. + These compilers don't support any of the newer acceleration ISAs + (e.g., AVX2) supported by blosc, and all x64 hardware supports SSE2 + which means we can get away with returning a hard-coded value from + this implementation of _xgetbv. */ + +static __inline uint64_t blosc_internal_xgetbv(uint32_t xcr) { + /* A 64-bit OS must have XMM save support. */ + return (xcr == 0 ? (1UL << 1) : 0UL); +} + +#else + +/* Hardware detection for any other MSVC targets (e.g., ARM) + isn't implemented at this time. */ +#error This version of c-blosc only supports x86 and x64 targets with MSVC. + +#endif /* _MSC_FULL_VER >= 160040219 */ + +#define blosc_internal_cpuid __cpuid + +#else + +/* Implement the __cpuid and __cpuidex intrinsics for GCC, Clang, + and others using inline assembly. */ +__attribute__((always_inline)) +static inline void +blosc_internal_cpuidex(int32_t cpuInfo[4], int32_t function_id, int32_t subfunction_id) { + __asm__ __volatile__ ( +# if defined(__i386__) && defined (__PIC__) + /* Can't clobber ebx with PIC running under 32-bit, so it needs to be manually restored. + https://software.intel.com/en-us/articles/how-to-detect-new-instruction-support-in-the-4th-generation-intel-core-processor-family + */ + "movl %%ebx, %%edi\n\t" + "cpuid\n\t" + "xchgl %%ebx, %%edi": + "=D" (cpuInfo[1]), +#else + "cpuid": + "=b" (cpuInfo[1]), +#endif /* defined(__i386) && defined(__PIC__) */ + "=a" (cpuInfo[0]), + "=c" (cpuInfo[2]), + "=d" (cpuInfo[3]) : + "a" (function_id), "c" (subfunction_id) + ); +} + +#define blosc_internal_cpuid(cpuInfo, function_id) blosc_internal_cpuidex(cpuInfo, function_id, 0) + +#define _XCR_XFEATURE_ENABLED_MASK 0 + +/* Reads the content of an extended control register. + https://software.intel.com/en-us/articles/how-to-detect-new-instruction-support-in-the-4th-generation-intel-core-processor-family +*/ +static inline uint64_t +blosc_internal_xgetbv(uint32_t xcr) { + uint32_t eax, edx; + __asm__ __volatile__ ( + /* "xgetbv" + This is specified as raw instruction bytes due to some older compilers + having issues with the mnemonic form. + */ + ".byte 0x0f, 0x01, 0xd0": + "=a" (eax), + "=d" (edx) : + "c" (xcr) + ); + return ((uint64_t)edx << 32) | eax; +} + +#endif /* defined(_MSC_FULL_VER) */ + +#ifndef _XCR_XFEATURE_ENABLED_MASK +#define _XCR_XFEATURE_ENABLED_MASK 0x0 +#endif + +static blosc_cpu_features blosc_get_cpu_features(void) { + blosc_cpu_features result = BLOSC_HAVE_NOTHING; + int32_t max_basic_function_id; + /* Holds the values of eax, ebx, ecx, edx set by the `cpuid` instruction */ + int32_t cpu_info[4]; + int sse2_available; + int sse3_available; + int ssse3_available; + int sse41_available; + int sse42_available; + int xsave_available; + int xsave_enabled_by_os; + int avx2_available = 0; + int avx512bw_available = 0; + int xmm_state_enabled = 0; + int ymm_state_enabled = 0; + int zmm_state_enabled = 0; + uint64_t xcr0_contents; + char* envvar; + + /* Get the number of basic functions available. */ + blosc_internal_cpuid(cpu_info, 0); + max_basic_function_id = cpu_info[0]; + + /* Check for SSE-based features and required OS support */ + blosc_internal_cpuid(cpu_info, 1); + sse2_available = (cpu_info[3] & (1 << 26)) != 0; + sse3_available = (cpu_info[2] & (1 << 0)) != 0; + ssse3_available = (cpu_info[2] & (1 << 9)) != 0; + sse41_available = (cpu_info[2] & (1 << 19)) != 0; + sse42_available = (cpu_info[2] & (1 << 20)) != 0; + + xsave_available = (cpu_info[2] & (1 << 26)) != 0; + xsave_enabled_by_os = (cpu_info[2] & (1 << 27)) != 0; + + /* Check for AVX-based features, if the processor supports extended features. */ + if (max_basic_function_id >= 7) { + blosc_internal_cpuid(cpu_info, 7); + avx2_available = (cpu_info[1] & (1 << 5)) != 0; + avx512bw_available = (cpu_info[1] & (1 << 30)) != 0; + } + + /* Even if certain features are supported by the CPU, they may not be supported + by the OS (in which case using them would crash the process or system). + If xsave is available and enabled by the OS, check the contents of the + extended control register XCR0 to see if the CPU features are enabled. */ +#if defined(_XCR_XFEATURE_ENABLED_MASK) + if (xsave_available && xsave_enabled_by_os && ( + sse2_available || sse3_available || ssse3_available + || sse41_available || sse42_available + || avx2_available || avx512bw_available)) { + /* Determine which register states can be restored by the OS. */ + xcr0_contents = blosc_internal_xgetbv(_XCR_XFEATURE_ENABLED_MASK); + + xmm_state_enabled = (xcr0_contents & (1UL << 1)) != 0; + ymm_state_enabled = (xcr0_contents & (1UL << 2)) != 0; + + /* Require support for both the upper 256-bits of zmm0-zmm15 to be + restored as well as all of zmm16-zmm31 and the opmask registers. */ + zmm_state_enabled = (xcr0_contents & 0x70) == 0x70; + } +#endif /* defined(_XCR_XFEATURE_ENABLED_MASK) */ + + envvar = getenv("BLOSC_PRINT_SHUFFLE_ACCEL"); + if (envvar != NULL) { + printf("Shuffle CPU Information:\n"); + printf("SSE2 available: %s\n", sse2_available ? "True" : "False"); + printf("SSE3 available: %s\n", sse3_available ? "True" : "False"); + printf("SSSE3 available: %s\n", ssse3_available ? "True" : "False"); + printf("SSE4.1 available: %s\n", sse41_available ? "True" : "False"); + printf("SSE4.2 available: %s\n", sse42_available ? "True" : "False"); + printf("AVX2 available: %s\n", avx2_available ? "True" : "False"); + printf("AVX512BW available: %s\n", avx512bw_available ? "True" : "False"); + printf("XSAVE available: %s\n", xsave_available ? "True" : "False"); + printf("XSAVE enabled: %s\n", xsave_enabled_by_os ? "True" : "False"); + printf("XMM state enabled: %s\n", xmm_state_enabled ? "True" : "False"); + printf("YMM state enabled: %s\n", ymm_state_enabled ? "True" : "False"); + printf("ZMM state enabled: %s\n", zmm_state_enabled ? "True" : "False"); + } + + /* Using the gathered CPU information, determine which implementation to use. */ + /* technically could fail on sse2 cpu on os without xmm support, but that + * shouldn't exist anymore */ + if (sse2_available) { + result |= BLOSC_HAVE_SSE2; + } + if (xmm_state_enabled && ymm_state_enabled && avx2_available) { + result |= BLOSC_HAVE_AVX2; + } + return result; +} +#endif + +#else /* No hardware acceleration supported for the target architecture. */ + #if defined(_MSC_VER) + #pragma message("Hardware-acceleration detection not implemented for the target architecture. Only the generic shuffle/unshuffle routines will be available.") + #else + #warning Hardware-acceleration detection not implemented for the target architecture. Only the generic shuffle/unshuffle routines will be available. + #endif + +static blosc_cpu_features blosc_get_cpu_features(void) { + return BLOSC_HAVE_NOTHING; +} + +#endif + +static shuffle_implementation_t get_shuffle_implementation(void) { + blosc_cpu_features cpu_features = blosc_get_cpu_features(); + shuffle_implementation_t impl_generic; + +#if defined(SHUFFLE_AVX2_ENABLED) + if (cpu_features & BLOSC_HAVE_AVX2) { + shuffle_implementation_t impl_avx2; + impl_avx2.name = "avx2"; + impl_avx2.shuffle = (shuffle_func)blosc_internal_shuffle_avx2; + impl_avx2.unshuffle = (unshuffle_func)blosc_internal_unshuffle_avx2; + impl_avx2.bitshuffle = (bitshuffle_func)blosc_internal_bshuf_trans_bit_elem_avx2; + impl_avx2.bitunshuffle = (bitunshuffle_func)blosc_internal_bshuf_untrans_bit_elem_avx2; + return impl_avx2; + } +#endif /* defined(SHUFFLE_AVX2_ENABLED) */ + +#if defined(SHUFFLE_SSE2_ENABLED) + if (cpu_features & BLOSC_HAVE_SSE2) { + shuffle_implementation_t impl_sse2; + impl_sse2.name = "sse2"; + impl_sse2.shuffle = (shuffle_func)blosc_internal_shuffle_sse2; + impl_sse2.unshuffle = (unshuffle_func)blosc_internal_unshuffle_sse2; + impl_sse2.bitshuffle = (bitshuffle_func)blosc_internal_bshuf_trans_bit_elem_sse2; + impl_sse2.bitunshuffle = (bitunshuffle_func)blosc_internal_bshuf_untrans_bit_elem_sse2; + return impl_sse2; + } +#endif /* defined(SHUFFLE_SSE2_ENABLED) */ + + /* Processor doesn't support any of the hardware-accelerated implementations, + so use the generic implementation. */ + impl_generic.name = "generic"; + impl_generic.shuffle = (shuffle_func)blosc_internal_shuffle_generic; + impl_generic.unshuffle = (unshuffle_func)blosc_internal_unshuffle_generic; + impl_generic.bitshuffle = (bitshuffle_func)blosc_internal_bshuf_trans_bit_elem_scal; + impl_generic.bitunshuffle = (bitunshuffle_func)blosc_internal_bshuf_untrans_bit_elem_scal; + return impl_generic; +} + + +/* Flag indicating whether the implementation has been initialized. */ +static pthread_once_t implementation_initialized = PTHREAD_ONCE_INIT; + +/* The dynamically-chosen shuffle/unshuffle implementation. + This is only safe to use once `implementation_initialized` is set. */ +static shuffle_implementation_t host_implementation; + +static void set_host_implementation(void) { + host_implementation = get_shuffle_implementation(); +} + +/* Initialize the shuffle implementation, if necessary. */ +#if defined(__GNUC__) || defined(__clang__) +__attribute__((always_inline)) +#endif +static +#if defined(_MSC_VER) +__forceinline +#else +BLOSC_INLINE +#endif +void init_shuffle_implementation(void) { + pthread_once(&implementation_initialized, &set_host_implementation); +} + +/* Shuffle a block by dynamically dispatching to the appropriate + hardware-accelerated routine at run-time. */ +void +blosc_internal_shuffle(const size_t bytesoftype, const size_t blocksize, + const uint8_t* _src, const uint8_t* _dest) { + /* Initialize the shuffle implementation if necessary. */ + init_shuffle_implementation(); + + /* The implementation is initialized. + Dispatch to it's shuffle routine. */ + (host_implementation.shuffle)(bytesoftype, blocksize, _src, _dest); +} + +/* Unshuffle a block by dynamically dispatching to the appropriate + hardware-accelerated routine at run-time. */ +void +blosc_internal_unshuffle(const size_t bytesoftype, const size_t blocksize, + const uint8_t* _src, const uint8_t* _dest) { + /* Initialize the shuffle implementation if necessary. */ + init_shuffle_implementation(); + + /* The implementation is initialized. + Dispatch to it's unshuffle routine. */ + (host_implementation.unshuffle)(bytesoftype, blocksize, _src, _dest); +} + +/* Bit-shuffle a block by dynamically dispatching to the appropriate + hardware-accelerated routine at run-time. */ +int +blosc_internal_bitshuffle(const size_t bytesoftype, const size_t blocksize, + const uint8_t* const _src, const uint8_t* _dest, + const uint8_t* _tmp) { + int size = blocksize / bytesoftype; + /* Initialize the shuffle implementation if necessary. */ + init_shuffle_implementation(); + + if ((size % 8) == 0) { + /* The number of elems is a multiple of 8 which is supported by + bitshuffle. */ + int ret = (int)(host_implementation.bitshuffle)((void *) _src, (void *) _dest, + blocksize / bytesoftype, + bytesoftype, (void *) _tmp); + /* Copy the leftovers */ + size_t offset = size * bytesoftype; + memcpy((void *) (_dest + offset), (void *) (_src + offset), blocksize - offset); + return ret; + } + else { + memcpy((void *) _dest, (void *) _src, blocksize); + } + return size; +} + +/* Bit-unshuffle a block by dynamically dispatching to the appropriate + hardware-accelerated routine at run-time. */ +int +blosc_internal_bitunshuffle(const size_t bytesoftype, const size_t blocksize, + const uint8_t* const _src, const uint8_t* _dest, + const uint8_t* _tmp) { + int size = blocksize / bytesoftype; + /* Initialize the shuffle implementation if necessary. */ + init_shuffle_implementation(); + + if ((size % 8) == 0) { + /* The number of elems is a multiple of 8 which is supported by + bitshuffle. */ + int ret = (int) (host_implementation.bitunshuffle)((void *) _src, (void *) _dest, + blocksize / bytesoftype, + bytesoftype, (void *) _tmp); + /* Copy the leftovers */ + size_t offset = size * bytesoftype; + memcpy((void *) (_dest + offset), (void *) (_src + offset), blocksize - offset); + return ret; + } + else { + memcpy((void *) _dest, (void *) _src, blocksize); + } + return size; +} diff --git a/c-blosc/blosc/shuffle.h b/c-blosc/blosc/shuffle.h new file mode 100644 index 0000000..8f7fd0c --- /dev/null +++ b/c-blosc/blosc/shuffle.h @@ -0,0 +1,67 @@ +/********************************************************************* + Blosc - Blocked Shuffling and Compression Library + + Author: Francesc Alted + + See LICENSE.txt for details about copyright and rights to use. +**********************************************************************/ + +/* Shuffle/unshuffle routines which dynamically dispatch to hardware- + accelerated routines based on the processor's architecture. + Consumers should almost always prefer to call these routines instead + of directly calling one of the hardware-accelerated routines, since + these are cross-platform and future-proof. */ + +#ifndef SHUFFLE_H +#define SHUFFLE_H + +#include "blosc-common.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + Primary shuffle and bitshuffle routines. + This function dynamically dispatches to the appropriate hardware-accelerated + routine based on the host processor's architecture. If the host processor is + not supported by any of the hardware-accelerated routines, the generic + (non-accelerated) implementation is used instead. + Consumers should almost always prefer to call this routine instead of directly + calling the hardware-accelerated routines because this method is both cross- + platform and future-proof. +*/ +BLOSC_NO_EXPORT void +blosc_internal_shuffle(const size_t bytesoftype, const size_t blocksize, + const uint8_t* _src, const uint8_t* _dest); + +BLOSC_NO_EXPORT int +blosc_internal_bitshuffle(const size_t bytesoftype, const size_t blocksize, + const uint8_t* const _src, const uint8_t* _dest, + const uint8_t* _tmp); + +/** + Primary unshuffle and bitunshuffle routine. + This function dynamically dispatches to the appropriate hardware-accelerated + routine based on the host processor's architecture. If the host processor is + not supported by any of the hardware-accelerated routines, the generic + (non-accelerated) implementation is used instead. + Consumers should almost always prefer to call this routine instead of directly + calling the hardware-accelerated routines because this method is both cross- + platform and future-proof. +*/ +BLOSC_NO_EXPORT void +blosc_internal_unshuffle(const size_t bytesoftype, const size_t blocksize, + const uint8_t* _src, const uint8_t* _dest); + + +BLOSC_NO_EXPORT int +blosc_internal_bitunshuffle(const size_t bytesoftype, const size_t blocksize, + const uint8_t* const _src, const uint8_t* _dest, + const uint8_t* _tmp); + +#ifdef __cplusplus +} +#endif + +#endif /* SHUFFLE_H */ diff --git a/c-blosc/blosc/win32/pthread.c b/c-blosc/blosc/win32/pthread.c new file mode 100644 index 0000000..23eb8f0 --- /dev/null +++ b/c-blosc/blosc/win32/pthread.c @@ -0,0 +1,218 @@ +/* + * Code for simulating pthreads API on Windows. This is Git-specific, + * but it is enough for Numexpr needs too. + * + * Copyright (C) 2009 Andrzej K. Haczewski + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * DISCLAIMER: The implementation is Git-specific, it is subset of original + * Pthreads API, without lots of other features that Git doesn't use. + * Git also makes sure that the passed arguments are valid, so there's + * no need for double-checking. + */ + +#include "pthread.h" + +#include +#include +#include +#include +#include + + +void die(const char *err, ...) +{ + printf("%s", err); + exit(-1); +} + +static unsigned __stdcall win32_start_routine(void *arg) +{ + pthread_t *thread = (pthread_t*)arg; + thread->arg = thread->start_routine(thread->arg); + return 0; +} + +int pthread_create(pthread_t *thread, const void *unused, + void *(*start_routine)(void*), void *arg) +{ + thread->arg = arg; + thread->start_routine = start_routine; + thread->handle = (HANDLE) + _beginthreadex(NULL, 0, win32_start_routine, thread, 0, NULL); + + if (!thread->handle) + return errno; + else + return 0; +} + +int win32_pthread_join(pthread_t *thread, void **value_ptr) +{ + DWORD result = WaitForSingleObject(thread->handle, INFINITE); + switch (result) { + case WAIT_OBJECT_0: + if (value_ptr) + *value_ptr = thread->arg; + return 0; + case WAIT_ABANDONED: + return EINVAL; + default: + return GetLastError(); + } +} + +int pthread_cond_init(pthread_cond_t *cond, const void *unused) +{ + cond->waiters = 0; + cond->was_broadcast = 0; + InitializeCriticalSection(&cond->waiters_lock); + + cond->sema = CreateSemaphore(NULL, 0, LONG_MAX, NULL); + if (!cond->sema) + die("CreateSemaphore() failed"); + + cond->continue_broadcast = CreateEvent(NULL, /* security */ + FALSE, /* auto-reset */ + FALSE, /* not signaled */ + NULL); /* name */ + if (!cond->continue_broadcast) + die("CreateEvent() failed"); + + return 0; +} + +int pthread_cond_destroy(pthread_cond_t *cond) +{ + CloseHandle(cond->sema); + CloseHandle(cond->continue_broadcast); + DeleteCriticalSection(&cond->waiters_lock); + return 0; +} + +int pthread_cond_wait(pthread_cond_t *cond, CRITICAL_SECTION *mutex) +{ + int last_waiter; + + EnterCriticalSection(&cond->waiters_lock); + cond->waiters++; + LeaveCriticalSection(&cond->waiters_lock); + + /* + * Unlock external mutex and wait for signal. + * NOTE: we've held mutex locked long enough to increment + * waiters count above, so there's no problem with + * leaving mutex unlocked before we wait on semaphore. + */ + LeaveCriticalSection(mutex); + + /* let's wait - ignore return value */ + WaitForSingleObject(cond->sema, INFINITE); + + /* + * Decrease waiters count. If we are the last waiter, then we must + * notify the broadcasting thread that it can continue. + * But if we continued due to cond_signal, we do not have to do that + * because the signaling thread knows that only one waiter continued. + */ + EnterCriticalSection(&cond->waiters_lock); + cond->waiters--; + last_waiter = cond->was_broadcast && cond->waiters == 0; + LeaveCriticalSection(&cond->waiters_lock); + + if (last_waiter) { + /* + * cond_broadcast was issued while mutex was held. This means + * that all other waiters have continued, but are contending + * for the mutex at the end of this function because the + * broadcasting thread did not leave cond_broadcast, yet. + * (This is so that it can be sure that each waiter has + * consumed exactly one slice of the semaphore.) + * The last waiter must tell the broadcasting thread that it + * can go on. + */ + SetEvent(cond->continue_broadcast); + /* + * Now we go on to contend with all other waiters for + * the mutex. Auf in den Kampf! + */ + } + /* lock external mutex again */ + EnterCriticalSection(mutex); + + return 0; +} + +/* + * IMPORTANT: This implementation requires that pthread_cond_signal + * is called while the mutex is held that is used in the corresponding + * pthread_cond_wait calls! + */ +int pthread_cond_signal(pthread_cond_t *cond) +{ + int have_waiters; + + EnterCriticalSection(&cond->waiters_lock); + have_waiters = cond->waiters > 0; + LeaveCriticalSection(&cond->waiters_lock); + + /* + * Signal only when there are waiters + */ + if (have_waiters) + return ReleaseSemaphore(cond->sema, 1, NULL) ? + 0 : GetLastError(); + else + return 0; +} + +/* + * DOUBLY IMPORTANT: This implementation requires that pthread_cond_broadcast + * is called while the mutex is held that is used in the corresponding + * pthread_cond_wait calls! + */ +int pthread_cond_broadcast(pthread_cond_t *cond) +{ + EnterCriticalSection(&cond->waiters_lock); + + if ((cond->was_broadcast = cond->waiters > 0)) { + /* wake up all waiters */ + ReleaseSemaphore(cond->sema, cond->waiters, NULL); + LeaveCriticalSection(&cond->waiters_lock); + /* + * At this point all waiters continue. Each one takes its + * slice of the semaphore. Now it's our turn to wait: Since + * the external mutex is held, no thread can leave cond_wait, + * yet. For this reason, we can be sure that no thread gets + * a chance to eat *more* than one slice. OTOH, it means + * that the last waiter must send us a wake-up. + */ + WaitForSingleObject(cond->continue_broadcast, INFINITE); + /* + * Since the external mutex is held, no thread can enter + * cond_wait, and, hence, it is safe to reset this flag + * without cond->waiters_lock held. + */ + cond->was_broadcast = 0; + } else { + LeaveCriticalSection(&cond->waiters_lock); + } + return 0; +} diff --git a/c-blosc/blosc/win32/pthread.h b/c-blosc/blosc/win32/pthread.h new file mode 100644 index 0000000..c119943 --- /dev/null +++ b/c-blosc/blosc/win32/pthread.h @@ -0,0 +1,112 @@ +/* + * Code for simulating pthreads API on Windows. This is Git-specific, + * but it is enough for Numexpr needs too. + * + * Copyright (C) 2009 Andrzej K. Haczewski + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * DISCLAIMER: The implementation is Git-specific, it is subset of original + * Pthreads API, without lots of other features that Git doesn't use. + * Git also makes sure that the passed arguments are valid, so there's + * no need for double-checking. + */ + +#ifndef PTHREAD_H +#define PTHREAD_H + +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN +#endif + +#include + +/* + * Defines that adapt Windows API threads to pthreads API + */ +#define pthread_mutex_t CRITICAL_SECTION + +#define pthread_mutex_init(a,b) InitializeCriticalSection((a)) +#define pthread_mutex_destroy(a) DeleteCriticalSection((a)) +#define pthread_mutex_lock EnterCriticalSection +#define pthread_mutex_unlock LeaveCriticalSection + +/* + * Implement simple condition variable for Windows threads, based on ACE + * implementation. + * + * See original implementation: http://bit.ly/1vkDjo + * ACE homepage: http://www.cse.wustl.edu/~schmidt/ACE.html + * See also: http://www.cse.wustl.edu/~schmidt/win32-cv-1.html + */ +typedef struct { + LONG waiters; + int was_broadcast; + CRITICAL_SECTION waiters_lock; + HANDLE sema; + HANDLE continue_broadcast; +} pthread_cond_t; + +extern int pthread_cond_init(pthread_cond_t *cond, const void *unused); +extern int pthread_cond_destroy(pthread_cond_t *cond); +extern int pthread_cond_wait(pthread_cond_t *cond, CRITICAL_SECTION *mutex); +extern int pthread_cond_signal(pthread_cond_t *cond); +extern int pthread_cond_broadcast(pthread_cond_t *cond); + +/* + * Simple thread creation implementation using pthread API + */ +typedef struct { + HANDLE handle; + void *(*start_routine)(void*); + void *arg; +} pthread_t; + +extern int pthread_create(pthread_t *thread, const void *unused, + void *(*start_routine)(void*), void *arg); + +/* + * To avoid the need of copying a struct, we use small macro wrapper to pass + * pointer to win32_pthread_join instead. + */ +#define pthread_join(a, b) win32_pthread_join(&(a), (b)) + +extern int win32_pthread_join(pthread_t *thread, void **value_ptr); + +/** + * pthread_once implementation based on the MS Windows One-Time Initialization + * (https://docs.microsoft.com/en-us/windows/desktop/Sync/one-time-initialization) + * APIs. + */ +typedef INIT_ONCE pthread_once_t; +#define PTHREAD_ONCE_INIT INIT_ONCE_STATIC_INIT +#define pthread_once blosc_internal_pthread_once /* Avoid symbol conflicts */ +static int blosc_internal_pthread_once(pthread_once_t* once_control, + void (*init_routine)(void)) { + BOOL pending; + InitOnceBeginInitialize(once_control, /*dwFlags=*/0, /*fPending=*/&pending, + NULL); + if (pending == TRUE) { + init_routine(); + InitOnceComplete(once_control, /*dwFlags=*/0, /*lpContext=*/NULL); + } + return 0; +} + +#endif /* PTHREAD_H */ diff --git a/c-blosc/blosc/win32/stdint-windows.h b/c-blosc/blosc/win32/stdint-windows.h new file mode 100644 index 0000000..4fe0ef9 --- /dev/null +++ b/c-blosc/blosc/win32/stdint-windows.h @@ -0,0 +1,259 @@ +// ISO C9x compliant stdint.h for Microsoft Visual Studio +// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 +// +// Copyright (c) 2006-2013 Alexander Chemeris +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the product nor the names of its contributors may +// be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED +// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO +// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef _MSC_VER // [ +#error "Use this header only with Microsoft Visual C++ compilers!" +#endif // _MSC_VER ] + +#ifndef _MSC_STDINT_H_ // [ +#define _MSC_STDINT_H_ + +#if _MSC_VER > 1000 +#pragma once +#endif + +#if _MSC_VER >= 1600 // [ +#include +#else // ] _MSC_VER >= 1600 [ + +#include + +// For Visual Studio 6 in C++ mode and for many Visual Studio versions when +// compiling for ARM we should wrap include with 'extern "C++" {}' +// or compiler give many errors like this: +// error C2733: second C linkage of overloaded function 'wmemchr' not allowed +#ifdef __cplusplus +extern "C" { +#endif +# include +#ifdef __cplusplus +} +#endif + +// Define _W64 macros to mark types changing their size, like intptr_t. +#ifndef _W64 +# if !defined(__midl) && (defined(_X86_) || defined(_M_IX86)) && _MSC_VER >= 1300 +# define _W64 __w64 +# else +# define _W64 +# endif +#endif + + +// 7.18.1 Integer types + +// 7.18.1.1 Exact-width integer types + +// Visual Studio 6 and Embedded Visual C++ 4 doesn't +// realize that, e.g. char has the same size as __int8 +// so we give up on __intX for them. +#if (_MSC_VER < 1300) + typedef signed char int8_t; + typedef signed short int16_t; + typedef signed int int32_t; + typedef unsigned char uint8_t; + typedef unsigned short uint16_t; + typedef unsigned int uint32_t; +#else + typedef signed __int8 int8_t; + typedef signed __int16 int16_t; + typedef signed __int32 int32_t; + typedef unsigned __int8 uint8_t; + typedef unsigned __int16 uint16_t; + typedef unsigned __int32 uint32_t; +#endif +typedef signed __int64 int64_t; +typedef unsigned __int64 uint64_t; + + +// 7.18.1.2 Minimum-width integer types +typedef int8_t int_least8_t; +typedef int16_t int_least16_t; +typedef int32_t int_least32_t; +typedef int64_t int_least64_t; +typedef uint8_t uint_least8_t; +typedef uint16_t uint_least16_t; +typedef uint32_t uint_least32_t; +typedef uint64_t uint_least64_t; + +// 7.18.1.3 Fastest minimum-width integer types +typedef int8_t int_fast8_t; +typedef int16_t int_fast16_t; +typedef int32_t int_fast32_t; +typedef int64_t int_fast64_t; +typedef uint8_t uint_fast8_t; +typedef uint16_t uint_fast16_t; +typedef uint32_t uint_fast32_t; +typedef uint64_t uint_fast64_t; + +// 7.18.1.4 Integer types capable of holding object pointers +#ifdef _WIN64 // [ + typedef signed __int64 intptr_t; + typedef unsigned __int64 uintptr_t; +#else // _WIN64 ][ + typedef _W64 signed int intptr_t; + typedef _W64 unsigned int uintptr_t; +#endif // _WIN64 ] + +// 7.18.1.5 Greatest-width integer types +typedef int64_t intmax_t; +typedef uint64_t uintmax_t; + + +// 7.18.2 Limits of specified-width integer types + +#if !defined(__cplusplus) || defined(__STDC_LIMIT_MACROS) // [ See footnote 220 at page 257 and footnote 221 at page 259 + +// 7.18.2.1 Limits of exact-width integer types +#define INT8_MIN ((int8_t)_I8_MIN) +#define INT8_MAX _I8_MAX +#define INT16_MIN ((int16_t)_I16_MIN) +#define INT16_MAX _I16_MAX +#define INT32_MIN ((int32_t)_I32_MIN) +#define INT32_MAX _I32_MAX +#define INT64_MIN ((int64_t)_I64_MIN) +#define INT64_MAX _I64_MAX +#define UINT8_MAX _UI8_MAX +#define UINT16_MAX _UI16_MAX +#define UINT32_MAX _UI32_MAX +#define UINT64_MAX _UI64_MAX + +// 7.18.2.2 Limits of minimum-width integer types +#define INT_LEAST8_MIN INT8_MIN +#define INT_LEAST8_MAX INT8_MAX +#define INT_LEAST16_MIN INT16_MIN +#define INT_LEAST16_MAX INT16_MAX +#define INT_LEAST32_MIN INT32_MIN +#define INT_LEAST32_MAX INT32_MAX +#define INT_LEAST64_MIN INT64_MIN +#define INT_LEAST64_MAX INT64_MAX +#define UINT_LEAST8_MAX UINT8_MAX +#define UINT_LEAST16_MAX UINT16_MAX +#define UINT_LEAST32_MAX UINT32_MAX +#define UINT_LEAST64_MAX UINT64_MAX + +// 7.18.2.3 Limits of fastest minimum-width integer types +#define INT_FAST8_MIN INT8_MIN +#define INT_FAST8_MAX INT8_MAX +#define INT_FAST16_MIN INT16_MIN +#define INT_FAST16_MAX INT16_MAX +#define INT_FAST32_MIN INT32_MIN +#define INT_FAST32_MAX INT32_MAX +#define INT_FAST64_MIN INT64_MIN +#define INT_FAST64_MAX INT64_MAX +#define UINT_FAST8_MAX UINT8_MAX +#define UINT_FAST16_MAX UINT16_MAX +#define UINT_FAST32_MAX UINT32_MAX +#define UINT_FAST64_MAX UINT64_MAX + +// 7.18.2.4 Limits of integer types capable of holding object pointers +#ifdef _WIN64 // [ +# define INTPTR_MIN INT64_MIN +# define INTPTR_MAX INT64_MAX +# define UINTPTR_MAX UINT64_MAX +#else // _WIN64 ][ +# define INTPTR_MIN INT32_MIN +# define INTPTR_MAX INT32_MAX +# define UINTPTR_MAX UINT32_MAX +#endif // _WIN64 ] + +// 7.18.2.5 Limits of greatest-width integer types +#define INTMAX_MIN INT64_MIN +#define INTMAX_MAX INT64_MAX +#define UINTMAX_MAX UINT64_MAX + +// 7.18.3 Limits of other integer types + +#ifdef _WIN64 // [ +# define PTRDIFF_MIN _I64_MIN +# define PTRDIFF_MAX _I64_MAX +#else // _WIN64 ][ +# define PTRDIFF_MIN _I32_MIN +# define PTRDIFF_MAX _I32_MAX +#endif // _WIN64 ] + +#define SIG_ATOMIC_MIN INT_MIN +#define SIG_ATOMIC_MAX INT_MAX + +#ifndef SIZE_MAX // [ +# ifdef _WIN64 // [ +# define SIZE_MAX _UI64_MAX +# else // _WIN64 ][ +# define SIZE_MAX _UI32_MAX +# endif // _WIN64 ] +#endif // SIZE_MAX ] + +// WCHAR_MIN and WCHAR_MAX are also defined in +#ifndef WCHAR_MIN // [ +# define WCHAR_MIN 0 +#endif // WCHAR_MIN ] +#ifndef WCHAR_MAX // [ +# define WCHAR_MAX _UI16_MAX +#endif // WCHAR_MAX ] + +#define WINT_MIN 0 +#define WINT_MAX _UI16_MAX + +#endif // __STDC_LIMIT_MACROS ] + + +// 7.18.4 Limits of other integer types + +#if !defined(__cplusplus) || defined(__STDC_CONSTANT_MACROS) // [ See footnote 224 at page 260 + +// 7.18.4.1 Macros for minimum-width integer constants + +#define INT8_C(val) val##i8 +#define INT16_C(val) val##i16 +#define INT32_C(val) val##i32 +#define INT64_C(val) val##i64 + +#define UINT8_C(val) val##ui8 +#define UINT16_C(val) val##ui16 +#define UINT32_C(val) val##ui32 +#define UINT64_C(val) val##ui64 + +// 7.18.4.2 Macros for greatest-width integer constants +// These #ifndef's are needed to prevent collisions with . +// Check out Issue 9 for the details. +#ifndef INTMAX_C // [ +# define INTMAX_C INT64_C +#endif // INTMAX_C ] +#ifndef UINTMAX_C // [ +# define UINTMAX_C UINT64_C +#endif // UINTMAX_C ] + +#endif // __STDC_CONSTANT_MACROS ] + +#endif // _MSC_VER >= 1600 ] + +#endif // _MSC_STDINT_H_ ] diff --git a/c-blosc/internal-complibs/lz4-1.9.4/lz4.c b/c-blosc/internal-complibs/lz4-1.9.4/lz4.c new file mode 100644 index 0000000..654bfdf --- /dev/null +++ b/c-blosc/internal-complibs/lz4-1.9.4/lz4.c @@ -0,0 +1,2722 @@ +/* + LZ4 - Fast LZ compression algorithm + Copyright (C) 2011-2020, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - LZ4 homepage : http://www.lz4.org + - LZ4 source repository : https://github.com/lz4/lz4 +*/ + +/*-************************************ +* Tuning parameters +**************************************/ +/* + * LZ4_HEAPMODE : + * Select how default compression functions will allocate memory for their hash table, + * in memory stack (0:default, fastest), or in memory heap (1:requires malloc()). + */ +#ifndef LZ4_HEAPMODE +# define LZ4_HEAPMODE 0 +#endif + +/* + * LZ4_ACCELERATION_DEFAULT : + * Select "acceleration" for LZ4_compress_fast() when parameter value <= 0 + */ +#define LZ4_ACCELERATION_DEFAULT 1 +/* + * LZ4_ACCELERATION_MAX : + * Any "acceleration" value higher than this threshold + * get treated as LZ4_ACCELERATION_MAX instead (fix #876) + */ +#define LZ4_ACCELERATION_MAX 65537 + + +/*-************************************ +* CPU Feature Detection +**************************************/ +/* LZ4_FORCE_MEMORY_ACCESS + * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. + * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. + * The below switch allow to select different access method for improved performance. + * Method 0 (default) : use `memcpy()`. Safe and portable. + * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). + * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. + * Method 2 : direct access. This method is portable but violate C standard. + * It can generate buggy code on targets which assembly generation depends on alignment. + * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) + * See https://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details. + * Prefer these methods in priority order (0 > 1 > 2) + */ +#ifndef LZ4_FORCE_MEMORY_ACCESS /* can be defined externally */ +# if defined(__GNUC__) && \ + ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) \ + || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) +# define LZ4_FORCE_MEMORY_ACCESS 2 +# elif (defined(__INTEL_COMPILER) && !defined(_WIN32)) || defined(__GNUC__) +# define LZ4_FORCE_MEMORY_ACCESS 1 +# endif +#endif + +/* + * LZ4_FORCE_SW_BITCOUNT + * Define this parameter if your target system or compiler does not support hardware bit count + */ +#if defined(_MSC_VER) && defined(_WIN32_WCE) /* Visual Studio for WinCE doesn't support Hardware bit count */ +# undef LZ4_FORCE_SW_BITCOUNT /* avoid double def */ +# define LZ4_FORCE_SW_BITCOUNT +#endif + + + +/*-************************************ +* Dependency +**************************************/ +/* + * LZ4_SRC_INCLUDED: + * Amalgamation flag, whether lz4.c is included + */ +#ifndef LZ4_SRC_INCLUDED +# define LZ4_SRC_INCLUDED 1 +#endif + +#ifndef LZ4_STATIC_LINKING_ONLY +#define LZ4_STATIC_LINKING_ONLY +#endif + +#ifndef LZ4_DISABLE_DEPRECATE_WARNINGS +#define LZ4_DISABLE_DEPRECATE_WARNINGS /* due to LZ4_decompress_safe_withPrefix64k */ +#endif + +#define LZ4_STATIC_LINKING_ONLY /* LZ4_DISTANCE_MAX */ +#include "lz4.h" +/* see also "memory routines" below */ + + +/*-************************************ +* Compiler Options +**************************************/ +#if defined(_MSC_VER) && (_MSC_VER >= 1400) /* Visual Studio 2005+ */ +# include /* only present in VS2005+ */ +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +# pragma warning(disable : 6237) /* disable: C6237: conditional expression is always 0 */ +#endif /* _MSC_VER */ + +#ifndef LZ4_FORCE_INLINE +# ifdef _MSC_VER /* Visual Studio */ +# define LZ4_FORCE_INLINE static __forceinline +# else +# if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ +# ifdef __GNUC__ +# define LZ4_FORCE_INLINE static inline __attribute__((always_inline)) +# else +# define LZ4_FORCE_INLINE static inline +# endif +# else +# define LZ4_FORCE_INLINE static +# endif /* __STDC_VERSION__ */ +# endif /* _MSC_VER */ +#endif /* LZ4_FORCE_INLINE */ + +/* LZ4_FORCE_O2 and LZ4_FORCE_INLINE + * gcc on ppc64le generates an unrolled SIMDized loop for LZ4_wildCopy8, + * together with a simple 8-byte copy loop as a fall-back path. + * However, this optimization hurts the decompression speed by >30%, + * because the execution does not go to the optimized loop + * for typical compressible data, and all of the preamble checks + * before going to the fall-back path become useless overhead. + * This optimization happens only with the -O3 flag, and -O2 generates + * a simple 8-byte copy loop. + * With gcc on ppc64le, all of the LZ4_decompress_* and LZ4_wildCopy8 + * functions are annotated with __attribute__((optimize("O2"))), + * and also LZ4_wildCopy8 is forcibly inlined, so that the O2 attribute + * of LZ4_wildCopy8 does not affect the compression speed. + */ +#if defined(__PPC64__) && defined(__LITTLE_ENDIAN__) && defined(__GNUC__) && !defined(__clang__) +# define LZ4_FORCE_O2 __attribute__((optimize("O2"))) +# undef LZ4_FORCE_INLINE +# define LZ4_FORCE_INLINE static __inline __attribute__((optimize("O2"),always_inline)) +#else +# define LZ4_FORCE_O2 +#endif + +#if (defined(__GNUC__) && (__GNUC__ >= 3)) || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) || defined(__clang__) +# define expect(expr,value) (__builtin_expect ((expr),(value)) ) +#else +# define expect(expr,value) (expr) +#endif + +#ifndef likely +#define likely(expr) expect((expr) != 0, 1) +#endif +#ifndef unlikely +#define unlikely(expr) expect((expr) != 0, 0) +#endif + +/* Should the alignment test prove unreliable, for some reason, + * it can be disabled by setting LZ4_ALIGN_TEST to 0 */ +#ifndef LZ4_ALIGN_TEST /* can be externally provided */ +# define LZ4_ALIGN_TEST 1 +#endif + + +/*-************************************ +* Memory routines +**************************************/ + +/*! LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION : + * Disable relatively high-level LZ4/HC functions that use dynamic memory + * allocation functions (malloc(), calloc(), free()). + * + * Note that this is a compile-time switch. And since it disables + * public/stable LZ4 v1 API functions, we don't recommend using this + * symbol to generate a library for distribution. + * + * The following public functions are removed when this symbol is defined. + * - lz4 : LZ4_createStream, LZ4_freeStream, + * LZ4_createStreamDecode, LZ4_freeStreamDecode, LZ4_create (deprecated) + * - lz4hc : LZ4_createStreamHC, LZ4_freeStreamHC, + * LZ4_createHC (deprecated), LZ4_freeHC (deprecated) + * - lz4frame, lz4file : All LZ4F_* functions + */ +#if defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION) +# define ALLOC(s) lz4_error_memory_allocation_is_disabled +# define ALLOC_AND_ZERO(s) lz4_error_memory_allocation_is_disabled +# define FREEMEM(p) lz4_error_memory_allocation_is_disabled +#elif defined(LZ4_USER_MEMORY_FUNCTIONS) +/* memory management functions can be customized by user project. + * Below functions must exist somewhere in the Project + * and be available at link time */ +void* LZ4_malloc(size_t s); +void* LZ4_calloc(size_t n, size_t s); +void LZ4_free(void* p); +# define ALLOC(s) LZ4_malloc(s) +# define ALLOC_AND_ZERO(s) LZ4_calloc(1,s) +# define FREEMEM(p) LZ4_free(p) +#else +# include /* malloc, calloc, free */ +# define ALLOC(s) malloc(s) +# define ALLOC_AND_ZERO(s) calloc(1,s) +# define FREEMEM(p) free(p) +#endif + +#if ! LZ4_FREESTANDING +# include /* memset, memcpy */ +#endif +#if !defined(LZ4_memset) +# define LZ4_memset(p,v,s) memset((p),(v),(s)) +#endif +#define MEM_INIT(p,v,s) LZ4_memset((p),(v),(s)) + + +/*-************************************ +* Common Constants +**************************************/ +#define MINMATCH 4 + +#define WILDCOPYLENGTH 8 +#define LASTLITERALS 5 /* see ../doc/lz4_Block_format.md#parsing-restrictions */ +#define MFLIMIT 12 /* see ../doc/lz4_Block_format.md#parsing-restrictions */ +#define MATCH_SAFEGUARD_DISTANCE ((2*WILDCOPYLENGTH) - MINMATCH) /* ensure it's possible to write 2 x wildcopyLength without overflowing output buffer */ +#define FASTLOOP_SAFE_DISTANCE 64 +static const int LZ4_minLength = (MFLIMIT+1); + +#define KB *(1 <<10) +#define MB *(1 <<20) +#define GB *(1U<<30) + +#define LZ4_DISTANCE_ABSOLUTE_MAX 65535 +#if (LZ4_DISTANCE_MAX > LZ4_DISTANCE_ABSOLUTE_MAX) /* max supported by LZ4 format */ +# error "LZ4_DISTANCE_MAX is too big : must be <= 65535" +#endif + +#define ML_BITS 4 +#define ML_MASK ((1U<=1) +# include +#else +# ifndef assert +# define assert(condition) ((void)0) +# endif +#endif + +#define LZ4_STATIC_ASSERT(c) { enum { LZ4_static_assert = 1/(int)(!!(c)) }; } /* use after variable declarations */ + +#if defined(LZ4_DEBUG) && (LZ4_DEBUG>=2) +# include + static int g_debuglog_enable = 1; +# define DEBUGLOG(l, ...) { \ + if ((g_debuglog_enable) && (l<=LZ4_DEBUG)) { \ + fprintf(stderr, __FILE__ ": "); \ + fprintf(stderr, __VA_ARGS__); \ + fprintf(stderr, " \n"); \ + } } +#else +# define DEBUGLOG(l, ...) {} /* disabled */ +#endif + +static int LZ4_isAligned(const void* ptr, size_t alignment) +{ + return ((size_t)ptr & (alignment -1)) == 0; +} + + +/*-************************************ +* Types +**************************************/ +#include +#if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +# include + typedef uint8_t BYTE; + typedef uint16_t U16; + typedef uint32_t U32; + typedef int32_t S32; + typedef uint64_t U64; + typedef uintptr_t uptrval; +#else +# if UINT_MAX != 4294967295UL +# error "LZ4 code (when not C++ or C99) assumes that sizeof(int) == 4" +# endif + typedef unsigned char BYTE; + typedef unsigned short U16; + typedef unsigned int U32; + typedef signed int S32; + typedef unsigned long long U64; + typedef size_t uptrval; /* generally true, except OpenVMS-64 */ +#endif + +#if defined(__x86_64__) + typedef U64 reg_t; /* 64-bits in x32 mode */ +#else + typedef size_t reg_t; /* 32-bits in x32 mode */ +#endif + +typedef enum { + notLimited = 0, + limitedOutput = 1, + fillOutput = 2 +} limitedOutput_directive; + + +/*-************************************ +* Reading and writing into memory +**************************************/ + +/** + * LZ4 relies on memcpy with a constant size being inlined. In freestanding + * environments, the compiler can't assume the implementation of memcpy() is + * standard compliant, so it can't apply its specialized memcpy() inlining + * logic. When possible, use __builtin_memcpy() to tell the compiler to analyze + * memcpy() as if it were standard compliant, so it can inline it in freestanding + * environments. This is needed when decompressing the Linux Kernel, for example. + */ +#if !defined(LZ4_memcpy) +# if defined(__GNUC__) && (__GNUC__ >= 4) +# define LZ4_memcpy(dst, src, size) __builtin_memcpy(dst, src, size) +# else +# define LZ4_memcpy(dst, src, size) memcpy(dst, src, size) +# endif +#endif + +#if !defined(LZ4_memmove) +# if defined(__GNUC__) && (__GNUC__ >= 4) +# define LZ4_memmove __builtin_memmove +# else +# define LZ4_memmove memmove +# endif +#endif + +static unsigned LZ4_isLittleEndian(void) +{ + const union { U32 u; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */ + return one.c[0]; +} + + +#if defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==2) +/* lie to the compiler about data alignment; use with caution */ + +static U16 LZ4_read16(const void* memPtr) { return *(const U16*) memPtr; } +static U32 LZ4_read32(const void* memPtr) { return *(const U32*) memPtr; } +static reg_t LZ4_read_ARCH(const void* memPtr) { return *(const reg_t*) memPtr; } + +static void LZ4_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; } +static void LZ4_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; } + +#elif defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==1) + +/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ +/* currently only defined for gcc and icc */ +typedef union { U16 u16; U32 u32; reg_t uArch; } __attribute__((packed)) LZ4_unalign; + +static U16 LZ4_read16(const void* ptr) { return ((const LZ4_unalign*)ptr)->u16; } +static U32 LZ4_read32(const void* ptr) { return ((const LZ4_unalign*)ptr)->u32; } +static reg_t LZ4_read_ARCH(const void* ptr) { return ((const LZ4_unalign*)ptr)->uArch; } + +static void LZ4_write16(void* memPtr, U16 value) { ((LZ4_unalign*)memPtr)->u16 = value; } +static void LZ4_write32(void* memPtr, U32 value) { ((LZ4_unalign*)memPtr)->u32 = value; } + +#else /* safe and portable access using memcpy() */ + +static U16 LZ4_read16(const void* memPtr) +{ + U16 val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val; +} + +static U32 LZ4_read32(const void* memPtr) +{ + U32 val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val; +} + +static reg_t LZ4_read_ARCH(const void* memPtr) +{ + reg_t val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val; +} + +static void LZ4_write16(void* memPtr, U16 value) +{ + LZ4_memcpy(memPtr, &value, sizeof(value)); +} + +static void LZ4_write32(void* memPtr, U32 value) +{ + LZ4_memcpy(memPtr, &value, sizeof(value)); +} + +#endif /* LZ4_FORCE_MEMORY_ACCESS */ + + +static U16 LZ4_readLE16(const void* memPtr) +{ + if (LZ4_isLittleEndian()) { + return LZ4_read16(memPtr); + } else { + const BYTE* p = (const BYTE*)memPtr; + return (U16)((U16)p[0] + (p[1]<<8)); + } +} + +static void LZ4_writeLE16(void* memPtr, U16 value) +{ + if (LZ4_isLittleEndian()) { + LZ4_write16(memPtr, value); + } else { + BYTE* p = (BYTE*)memPtr; + p[0] = (BYTE) value; + p[1] = (BYTE)(value>>8); + } +} + +/* customized variant of memcpy, which can overwrite up to 8 bytes beyond dstEnd */ +LZ4_FORCE_INLINE +void LZ4_wildCopy8(void* dstPtr, const void* srcPtr, void* dstEnd) +{ + BYTE* d = (BYTE*)dstPtr; + const BYTE* s = (const BYTE*)srcPtr; + BYTE* const e = (BYTE*)dstEnd; + + do { LZ4_memcpy(d,s,8); d+=8; s+=8; } while (d= 16. */ +LZ4_FORCE_INLINE void +LZ4_wildCopy32(void* dstPtr, const void* srcPtr, void* dstEnd) +{ + BYTE* d = (BYTE*)dstPtr; + const BYTE* s = (const BYTE*)srcPtr; + BYTE* const e = (BYTE*)dstEnd; + + do { LZ4_memcpy(d,s,16); LZ4_memcpy(d+16,s+16,16); d+=32; s+=32; } while (d= dstPtr + MINMATCH + * - there is at least 8 bytes available to write after dstEnd */ +LZ4_FORCE_INLINE void +LZ4_memcpy_using_offset(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset) +{ + BYTE v[8]; + + assert(dstEnd >= dstPtr + MINMATCH); + + switch(offset) { + case 1: + MEM_INIT(v, *srcPtr, 8); + break; + case 2: + LZ4_memcpy(v, srcPtr, 2); + LZ4_memcpy(&v[2], srcPtr, 2); +#if defined(_MSC_VER) && (_MSC_VER <= 1933) /* MSVC 2022 ver 17.3 or earlier */ +# pragma warning(push) +# pragma warning(disable : 6385) /* warning C6385: Reading invalid data from 'v'. */ +#endif + LZ4_memcpy(&v[4], v, 4); +#if defined(_MSC_VER) && (_MSC_VER <= 1933) /* MSVC 2022 ver 17.3 or earlier */ +# pragma warning(pop) +#endif + break; + case 4: + LZ4_memcpy(v, srcPtr, 4); + LZ4_memcpy(&v[4], srcPtr, 4); + break; + default: + LZ4_memcpy_using_offset_base(dstPtr, srcPtr, dstEnd, offset); + return; + } + + LZ4_memcpy(dstPtr, v, 8); + dstPtr += 8; + while (dstPtr < dstEnd) { + LZ4_memcpy(dstPtr, v, 8); + dstPtr += 8; + } +} +#endif + + +/*-************************************ +* Common functions +**************************************/ +static unsigned LZ4_NbCommonBytes (reg_t val) +{ + assert(val != 0); + if (LZ4_isLittleEndian()) { + if (sizeof(val) == 8) { +# if defined(_MSC_VER) && (_MSC_VER >= 1800) && (defined(_M_AMD64) && !defined(_M_ARM64EC)) && !defined(LZ4_FORCE_SW_BITCOUNT) +/*-************************************************************************************************* +* ARM64EC is a Microsoft-designed ARM64 ABI compatible with AMD64 applications on ARM64 Windows 11. +* The ARM64EC ABI does not support AVX/AVX2/AVX512 instructions, nor their relevant intrinsics +* including _tzcnt_u64. Therefore, we need to neuter the _tzcnt_u64 code path for ARM64EC. +****************************************************************************************************/ +# if defined(__clang__) && (__clang_major__ < 10) + /* Avoid undefined clang-cl intrinsics issue. + * See https://github.com/lz4/lz4/pull/1017 for details. */ + return (unsigned)__builtin_ia32_tzcnt_u64(val) >> 3; +# else + /* x64 CPUS without BMI support interpret `TZCNT` as `REP BSF` */ + return (unsigned)_tzcnt_u64(val) >> 3; +# endif +# elif defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT) + unsigned long r = 0; + _BitScanForward64(&r, (U64)val); + return (unsigned)r >> 3; +# elif (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \ + ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \ + !defined(LZ4_FORCE_SW_BITCOUNT) + return (unsigned)__builtin_ctzll((U64)val) >> 3; +# else + const U64 m = 0x0101010101010101ULL; + val ^= val - 1; + return (unsigned)(((U64)((val & (m - 1)) * m)) >> 56); +# endif + } else /* 32 bits */ { +# if defined(_MSC_VER) && (_MSC_VER >= 1400) && !defined(LZ4_FORCE_SW_BITCOUNT) + unsigned long r; + _BitScanForward(&r, (U32)val); + return (unsigned)r >> 3; +# elif (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \ + ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \ + !defined(__TINYC__) && !defined(LZ4_FORCE_SW_BITCOUNT) + return (unsigned)__builtin_ctz((U32)val) >> 3; +# else + const U32 m = 0x01010101; + return (unsigned)((((val - 1) ^ val) & (m - 1)) * m) >> 24; +# endif + } + } else /* Big Endian CPU */ { + if (sizeof(val)==8) { +# if (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \ + ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \ + !defined(__TINYC__) && !defined(LZ4_FORCE_SW_BITCOUNT) + return (unsigned)__builtin_clzll((U64)val) >> 3; +# else +#if 1 + /* this method is probably faster, + * but adds a 128 bytes lookup table */ + static const unsigned char ctz7_tab[128] = { + 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + }; + U64 const mask = 0x0101010101010101ULL; + U64 const t = (((val >> 8) - mask) | val) & mask; + return ctz7_tab[(t * 0x0080402010080402ULL) >> 57]; +#else + /* this method doesn't consume memory space like the previous one, + * but it contains several branches, + * that may end up slowing execution */ + static const U32 by32 = sizeof(val)*4; /* 32 on 64 bits (goal), 16 on 32 bits. + Just to avoid some static analyzer complaining about shift by 32 on 32-bits target. + Note that this code path is never triggered in 32-bits mode. */ + unsigned r; + if (!(val>>by32)) { r=4; } else { r=0; val>>=by32; } + if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } + r += (!val); + return r; +#endif +# endif + } else /* 32 bits */ { +# if (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \ + ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \ + !defined(LZ4_FORCE_SW_BITCOUNT) + return (unsigned)__builtin_clz((U32)val) >> 3; +# else + val >>= 8; + val = ((((val + 0x00FFFF00) | 0x00FFFFFF) + val) | + (val + 0x00FF0000)) >> 24; + return (unsigned)val ^ 3; +# endif + } + } +} + + +#define STEPSIZE sizeof(reg_t) +LZ4_FORCE_INLINE +unsigned LZ4_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit) +{ + const BYTE* const pStart = pIn; + + if (likely(pIn < pInLimit-(STEPSIZE-1))) { + reg_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn); + if (!diff) { + pIn+=STEPSIZE; pMatch+=STEPSIZE; + } else { + return LZ4_NbCommonBytes(diff); + } } + + while (likely(pIn < pInLimit-(STEPSIZE-1))) { + reg_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn); + if (!diff) { pIn+=STEPSIZE; pMatch+=STEPSIZE; continue; } + pIn += LZ4_NbCommonBytes(diff); + return (unsigned)(pIn - pStart); + } + + if ((STEPSIZE==8) && (pIn<(pInLimit-3)) && (LZ4_read32(pMatch) == LZ4_read32(pIn))) { pIn+=4; pMatch+=4; } + if ((pIn<(pInLimit-1)) && (LZ4_read16(pMatch) == LZ4_read16(pIn))) { pIn+=2; pMatch+=2; } + if ((pIn compression run slower on incompressible data */ + + +/*-************************************ +* Local Structures and types +**************************************/ +typedef enum { clearedTable = 0, byPtr, byU32, byU16 } tableType_t; + +/** + * This enum distinguishes several different modes of accessing previous + * content in the stream. + * + * - noDict : There is no preceding content. + * - withPrefix64k : Table entries up to ctx->dictSize before the current blob + * blob being compressed are valid and refer to the preceding + * content (of length ctx->dictSize), which is available + * contiguously preceding in memory the content currently + * being compressed. + * - usingExtDict : Like withPrefix64k, but the preceding content is somewhere + * else in memory, starting at ctx->dictionary with length + * ctx->dictSize. + * - usingDictCtx : Everything concerning the preceding content is + * in a separate context, pointed to by ctx->dictCtx. + * ctx->dictionary, ctx->dictSize, and table entries + * in the current context that refer to positions + * preceding the beginning of the current compression are + * ignored. Instead, ctx->dictCtx->dictionary and ctx->dictCtx + * ->dictSize describe the location and size of the preceding + * content, and matches are found by looking in the ctx + * ->dictCtx->hashTable. + */ +typedef enum { noDict = 0, withPrefix64k, usingExtDict, usingDictCtx } dict_directive; +typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive; + + +/*-************************************ +* Local Utils +**************************************/ +int LZ4_versionNumber (void) { return LZ4_VERSION_NUMBER; } +const char* LZ4_versionString(void) { return LZ4_VERSION_STRING; } +int LZ4_compressBound(int isize) { return LZ4_COMPRESSBOUND(isize); } +int LZ4_sizeofState(void) { return sizeof(LZ4_stream_t); } + + +/*-**************************************** +* Internal Definitions, used only in Tests +*******************************************/ +#if defined (__cplusplus) +extern "C" { +#endif + +int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_dict, const char* source, char* dest, int srcSize); + +int LZ4_decompress_safe_forceExtDict(const char* source, char* dest, + int compressedSize, int maxOutputSize, + const void* dictStart, size_t dictSize); +int LZ4_decompress_safe_partial_forceExtDict(const char* source, char* dest, + int compressedSize, int targetOutputSize, int dstCapacity, + const void* dictStart, size_t dictSize); +#if defined (__cplusplus) +} +#endif + +/*-****************************** +* Compression functions +********************************/ +LZ4_FORCE_INLINE U32 LZ4_hash4(U32 sequence, tableType_t const tableType) +{ + if (tableType == byU16) + return ((sequence * 2654435761U) >> ((MINMATCH*8)-(LZ4_HASHLOG+1))); + else + return ((sequence * 2654435761U) >> ((MINMATCH*8)-LZ4_HASHLOG)); +} + +LZ4_FORCE_INLINE U32 LZ4_hash5(U64 sequence, tableType_t const tableType) +{ + const U32 hashLog = (tableType == byU16) ? LZ4_HASHLOG+1 : LZ4_HASHLOG; + if (LZ4_isLittleEndian()) { + const U64 prime5bytes = 889523592379ULL; + return (U32)(((sequence << 24) * prime5bytes) >> (64 - hashLog)); + } else { + const U64 prime8bytes = 11400714785074694791ULL; + return (U32)(((sequence >> 24) * prime8bytes) >> (64 - hashLog)); + } +} + +LZ4_FORCE_INLINE U32 LZ4_hashPosition(const void* const p, tableType_t const tableType) +{ + if ((sizeof(reg_t)==8) && (tableType != byU16)) return LZ4_hash5(LZ4_read_ARCH(p), tableType); + return LZ4_hash4(LZ4_read32(p), tableType); +} + +LZ4_FORCE_INLINE void LZ4_clearHash(U32 h, void* tableBase, tableType_t const tableType) +{ + switch (tableType) + { + default: /* fallthrough */ + case clearedTable: { /* illegal! */ assert(0); return; } + case byPtr: { const BYTE** hashTable = (const BYTE**)tableBase; hashTable[h] = NULL; return; } + case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = 0; return; } + case byU16: { U16* hashTable = (U16*) tableBase; hashTable[h] = 0; return; } + } +} + +LZ4_FORCE_INLINE void LZ4_putIndexOnHash(U32 idx, U32 h, void* tableBase, tableType_t const tableType) +{ + switch (tableType) + { + default: /* fallthrough */ + case clearedTable: /* fallthrough */ + case byPtr: { /* illegal! */ assert(0); return; } + case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = idx; return; } + case byU16: { U16* hashTable = (U16*) tableBase; assert(idx < 65536); hashTable[h] = (U16)idx; return; } + } +} + +LZ4_FORCE_INLINE void LZ4_putPositionOnHash(const BYTE* p, U32 h, + void* tableBase, tableType_t const tableType, + const BYTE* srcBase) +{ + switch (tableType) + { + case clearedTable: { /* illegal! */ assert(0); return; } + case byPtr: { const BYTE** hashTable = (const BYTE**)tableBase; hashTable[h] = p; return; } + case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = (U32)(p-srcBase); return; } + case byU16: { U16* hashTable = (U16*) tableBase; hashTable[h] = (U16)(p-srcBase); return; } + } +} + +LZ4_FORCE_INLINE void LZ4_putPosition(const BYTE* p, void* tableBase, tableType_t tableType, const BYTE* srcBase) +{ + U32 const h = LZ4_hashPosition(p, tableType); + LZ4_putPositionOnHash(p, h, tableBase, tableType, srcBase); +} + +/* LZ4_getIndexOnHash() : + * Index of match position registered in hash table. + * hash position must be calculated by using base+index, or dictBase+index. + * Assumption 1 : only valid if tableType == byU32 or byU16. + * Assumption 2 : h is presumed valid (within limits of hash table) + */ +LZ4_FORCE_INLINE U32 LZ4_getIndexOnHash(U32 h, const void* tableBase, tableType_t tableType) +{ + LZ4_STATIC_ASSERT(LZ4_MEMORY_USAGE > 2); + if (tableType == byU32) { + const U32* const hashTable = (const U32*) tableBase; + assert(h < (1U << (LZ4_MEMORY_USAGE-2))); + return hashTable[h]; + } + if (tableType == byU16) { + const U16* const hashTable = (const U16*) tableBase; + assert(h < (1U << (LZ4_MEMORY_USAGE-1))); + return hashTable[h]; + } + assert(0); return 0; /* forbidden case */ +} + +static const BYTE* LZ4_getPositionOnHash(U32 h, const void* tableBase, tableType_t tableType, const BYTE* srcBase) +{ + if (tableType == byPtr) { const BYTE* const* hashTable = (const BYTE* const*) tableBase; return hashTable[h]; } + if (tableType == byU32) { const U32* const hashTable = (const U32*) tableBase; return hashTable[h] + srcBase; } + { const U16* const hashTable = (const U16*) tableBase; return hashTable[h] + srcBase; } /* default, to ensure a return */ +} + +LZ4_FORCE_INLINE const BYTE* +LZ4_getPosition(const BYTE* p, + const void* tableBase, tableType_t tableType, + const BYTE* srcBase) +{ + U32 const h = LZ4_hashPosition(p, tableType); + return LZ4_getPositionOnHash(h, tableBase, tableType, srcBase); +} + +LZ4_FORCE_INLINE void +LZ4_prepareTable(LZ4_stream_t_internal* const cctx, + const int inputSize, + const tableType_t tableType) { + /* If the table hasn't been used, it's guaranteed to be zeroed out, and is + * therefore safe to use no matter what mode we're in. Otherwise, we figure + * out if it's safe to leave as is or whether it needs to be reset. + */ + if ((tableType_t)cctx->tableType != clearedTable) { + assert(inputSize >= 0); + if ((tableType_t)cctx->tableType != tableType + || ((tableType == byU16) && cctx->currentOffset + (unsigned)inputSize >= 0xFFFFU) + || ((tableType == byU32) && cctx->currentOffset > 1 GB) + || tableType == byPtr + || inputSize >= 4 KB) + { + DEBUGLOG(4, "LZ4_prepareTable: Resetting table in %p", cctx); + MEM_INIT(cctx->hashTable, 0, LZ4_HASHTABLESIZE); + cctx->currentOffset = 0; + cctx->tableType = (U32)clearedTable; + } else { + DEBUGLOG(4, "LZ4_prepareTable: Re-use hash table (no reset)"); + } + } + + /* Adding a gap, so all previous entries are > LZ4_DISTANCE_MAX back, + * is faster than compressing without a gap. + * However, compressing with currentOffset == 0 is faster still, + * so we preserve that case. + */ + if (cctx->currentOffset != 0 && tableType == byU32) { + DEBUGLOG(5, "LZ4_prepareTable: adding 64KB to currentOffset"); + cctx->currentOffset += 64 KB; + } + + /* Finally, clear history */ + cctx->dictCtx = NULL; + cctx->dictionary = NULL; + cctx->dictSize = 0; +} + +/** LZ4_compress_generic() : + * inlined, to ensure branches are decided at compilation time. + * Presumed already validated at this stage: + * - source != NULL + * - inputSize > 0 + */ +LZ4_FORCE_INLINE int LZ4_compress_generic_validated( + LZ4_stream_t_internal* const cctx, + const char* const source, + char* const dest, + const int inputSize, + int* inputConsumed, /* only written when outputDirective == fillOutput */ + const int maxOutputSize, + const limitedOutput_directive outputDirective, + const tableType_t tableType, + const dict_directive dictDirective, + const dictIssue_directive dictIssue, + const int acceleration) +{ + int result; + const BYTE* ip = (const BYTE*) source; + + U32 const startIndex = cctx->currentOffset; + const BYTE* base = (const BYTE*) source - startIndex; + const BYTE* lowLimit; + + const LZ4_stream_t_internal* dictCtx = (const LZ4_stream_t_internal*) cctx->dictCtx; + const BYTE* const dictionary = + dictDirective == usingDictCtx ? dictCtx->dictionary : cctx->dictionary; + const U32 dictSize = + dictDirective == usingDictCtx ? dictCtx->dictSize : cctx->dictSize; + const U32 dictDelta = (dictDirective == usingDictCtx) ? startIndex - dictCtx->currentOffset : 0; /* make indexes in dictCtx comparable with index in current context */ + + int const maybe_extMem = (dictDirective == usingExtDict) || (dictDirective == usingDictCtx); + U32 const prefixIdxLimit = startIndex - dictSize; /* used when dictDirective == dictSmall */ + const BYTE* const dictEnd = dictionary ? dictionary + dictSize : dictionary; + const BYTE* anchor = (const BYTE*) source; + const BYTE* const iend = ip + inputSize; + const BYTE* const mflimitPlusOne = iend - MFLIMIT + 1; + const BYTE* const matchlimit = iend - LASTLITERALS; + + /* the dictCtx currentOffset is indexed on the start of the dictionary, + * while a dictionary in the current context precedes the currentOffset */ + const BYTE* dictBase = (dictionary == NULL) ? NULL : + (dictDirective == usingDictCtx) ? + dictionary + dictSize - dictCtx->currentOffset : + dictionary + dictSize - startIndex; + + BYTE* op = (BYTE*) dest; + BYTE* const olimit = op + maxOutputSize; + + U32 offset = 0; + U32 forwardH; + + DEBUGLOG(5, "LZ4_compress_generic_validated: srcSize=%i, tableType=%u", inputSize, tableType); + assert(ip != NULL); + /* If init conditions are not met, we don't have to mark stream + * as having dirty context, since no action was taken yet */ + if (outputDirective == fillOutput && maxOutputSize < 1) { return 0; } /* Impossible to store anything */ + if ((tableType == byU16) && (inputSize>=LZ4_64Klimit)) { return 0; } /* Size too large (not within 64K limit) */ + if (tableType==byPtr) assert(dictDirective==noDict); /* only supported use case with byPtr */ + assert(acceleration >= 1); + + lowLimit = (const BYTE*)source - (dictDirective == withPrefix64k ? dictSize : 0); + + /* Update context state */ + if (dictDirective == usingDictCtx) { + /* Subsequent linked blocks can't use the dictionary. */ + /* Instead, they use the block we just compressed. */ + cctx->dictCtx = NULL; + cctx->dictSize = (U32)inputSize; + } else { + cctx->dictSize += (U32)inputSize; + } + cctx->currentOffset += (U32)inputSize; + cctx->tableType = (U32)tableType; + + if (inputSizehashTable, tableType, base); + ip++; forwardH = LZ4_hashPosition(ip, tableType); + + /* Main Loop */ + for ( ; ; ) { + const BYTE* match; + BYTE* token; + const BYTE* filledIp; + + /* Find a match */ + if (tableType == byPtr) { + const BYTE* forwardIp = ip; + int step = 1; + int searchMatchNb = acceleration << LZ4_skipTrigger; + do { + U32 const h = forwardH; + ip = forwardIp; + forwardIp += step; + step = (searchMatchNb++ >> LZ4_skipTrigger); + + if (unlikely(forwardIp > mflimitPlusOne)) goto _last_literals; + assert(ip < mflimitPlusOne); + + match = LZ4_getPositionOnHash(h, cctx->hashTable, tableType, base); + forwardH = LZ4_hashPosition(forwardIp, tableType); + LZ4_putPositionOnHash(ip, h, cctx->hashTable, tableType, base); + + } while ( (match+LZ4_DISTANCE_MAX < ip) + || (LZ4_read32(match) != LZ4_read32(ip)) ); + + } else { /* byU32, byU16 */ + + const BYTE* forwardIp = ip; + int step = 1; + int searchMatchNb = acceleration << LZ4_skipTrigger; + do { + U32 const h = forwardH; + U32 const current = (U32)(forwardIp - base); + U32 matchIndex = LZ4_getIndexOnHash(h, cctx->hashTable, tableType); + assert(matchIndex <= current); + assert(forwardIp - base < (ptrdiff_t)(2 GB - 1)); + ip = forwardIp; + forwardIp += step; + step = (searchMatchNb++ >> LZ4_skipTrigger); + + if (unlikely(forwardIp > mflimitPlusOne)) goto _last_literals; + assert(ip < mflimitPlusOne); + + if (dictDirective == usingDictCtx) { + if (matchIndex < startIndex) { + /* there was no match, try the dictionary */ + assert(tableType == byU32); + matchIndex = LZ4_getIndexOnHash(h, dictCtx->hashTable, byU32); + match = dictBase + matchIndex; + matchIndex += dictDelta; /* make dictCtx index comparable with current context */ + lowLimit = dictionary; + } else { + match = base + matchIndex; + lowLimit = (const BYTE*)source; + } + } else if (dictDirective == usingExtDict) { + if (matchIndex < startIndex) { + DEBUGLOG(7, "extDict candidate: matchIndex=%5u < startIndex=%5u", matchIndex, startIndex); + assert(startIndex - matchIndex >= MINMATCH); + assert(dictBase); + match = dictBase + matchIndex; + lowLimit = dictionary; + } else { + match = base + matchIndex; + lowLimit = (const BYTE*)source; + } + } else { /* single continuous memory segment */ + match = base + matchIndex; + } + forwardH = LZ4_hashPosition(forwardIp, tableType); + LZ4_putIndexOnHash(current, h, cctx->hashTable, tableType); + + DEBUGLOG(7, "candidate at pos=%u (offset=%u \n", matchIndex, current - matchIndex); + if ((dictIssue == dictSmall) && (matchIndex < prefixIdxLimit)) { continue; } /* match outside of valid area */ + assert(matchIndex < current); + if ( ((tableType != byU16) || (LZ4_DISTANCE_MAX < LZ4_DISTANCE_ABSOLUTE_MAX)) + && (matchIndex+LZ4_DISTANCE_MAX < current)) { + continue; + } /* too far */ + assert((current - matchIndex) <= LZ4_DISTANCE_MAX); /* match now expected within distance */ + + if (LZ4_read32(match) == LZ4_read32(ip)) { + if (maybe_extMem) offset = current - matchIndex; + break; /* match found */ + } + + } while(1); + } + + /* Catch up */ + filledIp = ip; + while (((ip>anchor) & (match > lowLimit)) && (unlikely(ip[-1]==match[-1]))) { ip--; match--; } + + /* Encode Literals */ + { unsigned const litLength = (unsigned)(ip - anchor); + token = op++; + if ((outputDirective == limitedOutput) && /* Check output buffer overflow */ + (unlikely(op + litLength + (2 + 1 + LASTLITERALS) + (litLength/255) > olimit)) ) { + return 0; /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */ + } + if ((outputDirective == fillOutput) && + (unlikely(op + (litLength+240)/255 /* litlen */ + litLength /* literals */ + 2 /* offset */ + 1 /* token */ + MFLIMIT - MINMATCH /* min last literals so last match is <= end - MFLIMIT */ > olimit))) { + op--; + goto _last_literals; + } + if (litLength >= RUN_MASK) { + int len = (int)(litLength - RUN_MASK); + *token = (RUN_MASK<= 255 ; len-=255) *op++ = 255; + *op++ = (BYTE)len; + } + else *token = (BYTE)(litLength< olimit)) { + /* the match was too close to the end, rewind and go to last literals */ + op = token; + goto _last_literals; + } + + /* Encode Offset */ + if (maybe_extMem) { /* static test */ + DEBUGLOG(6, " with offset=%u (ext if > %i)", offset, (int)(ip - (const BYTE*)source)); + assert(offset <= LZ4_DISTANCE_MAX && offset > 0); + LZ4_writeLE16(op, (U16)offset); op+=2; + } else { + DEBUGLOG(6, " with offset=%u (same segment)", (U32)(ip - match)); + assert(ip-match <= LZ4_DISTANCE_MAX); + LZ4_writeLE16(op, (U16)(ip - match)); op+=2; + } + + /* Encode MatchLength */ + { unsigned matchCode; + + if ( (dictDirective==usingExtDict || dictDirective==usingDictCtx) + && (lowLimit==dictionary) /* match within extDict */ ) { + const BYTE* limit = ip + (dictEnd-match); + assert(dictEnd > match); + if (limit > matchlimit) limit = matchlimit; + matchCode = LZ4_count(ip+MINMATCH, match+MINMATCH, limit); + ip += (size_t)matchCode + MINMATCH; + if (ip==limit) { + unsigned const more = LZ4_count(limit, (const BYTE*)source, matchlimit); + matchCode += more; + ip += more; + } + DEBUGLOG(6, " with matchLength=%u starting in extDict", matchCode+MINMATCH); + } else { + matchCode = LZ4_count(ip+MINMATCH, match+MINMATCH, matchlimit); + ip += (size_t)matchCode + MINMATCH; + DEBUGLOG(6, " with matchLength=%u", matchCode+MINMATCH); + } + + if ((outputDirective) && /* Check output buffer overflow */ + (unlikely(op + (1 + LASTLITERALS) + (matchCode+240)/255 > olimit)) ) { + if (outputDirective == fillOutput) { + /* Match description too long : reduce it */ + U32 newMatchCode = 15 /* in token */ - 1 /* to avoid needing a zero byte */ + ((U32)(olimit - op) - 1 - LASTLITERALS) * 255; + ip -= matchCode - newMatchCode; + assert(newMatchCode < matchCode); + matchCode = newMatchCode; + if (unlikely(ip <= filledIp)) { + /* We have already filled up to filledIp so if ip ends up less than filledIp + * we have positions in the hash table beyond the current position. This is + * a problem if we reuse the hash table. So we have to remove these positions + * from the hash table. + */ + const BYTE* ptr; + DEBUGLOG(5, "Clearing %u positions", (U32)(filledIp - ip)); + for (ptr = ip; ptr <= filledIp; ++ptr) { + U32 const h = LZ4_hashPosition(ptr, tableType); + LZ4_clearHash(h, cctx->hashTable, tableType); + } + } + } else { + assert(outputDirective == limitedOutput); + return 0; /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */ + } + } + if (matchCode >= ML_MASK) { + *token += ML_MASK; + matchCode -= ML_MASK; + LZ4_write32(op, 0xFFFFFFFF); + while (matchCode >= 4*255) { + op+=4; + LZ4_write32(op, 0xFFFFFFFF); + matchCode -= 4*255; + } + op += matchCode / 255; + *op++ = (BYTE)(matchCode % 255); + } else + *token += (BYTE)(matchCode); + } + /* Ensure we have enough space for the last literals. */ + assert(!(outputDirective == fillOutput && op + 1 + LASTLITERALS > olimit)); + + anchor = ip; + + /* Test end of chunk */ + if (ip >= mflimitPlusOne) break; + + /* Fill table */ + LZ4_putPosition(ip-2, cctx->hashTable, tableType, base); + + /* Test next position */ + if (tableType == byPtr) { + + match = LZ4_getPosition(ip, cctx->hashTable, tableType, base); + LZ4_putPosition(ip, cctx->hashTable, tableType, base); + if ( (match+LZ4_DISTANCE_MAX >= ip) + && (LZ4_read32(match) == LZ4_read32(ip)) ) + { token=op++; *token=0; goto _next_match; } + + } else { /* byU32, byU16 */ + + U32 const h = LZ4_hashPosition(ip, tableType); + U32 const current = (U32)(ip-base); + U32 matchIndex = LZ4_getIndexOnHash(h, cctx->hashTable, tableType); + assert(matchIndex < current); + if (dictDirective == usingDictCtx) { + if (matchIndex < startIndex) { + /* there was no match, try the dictionary */ + matchIndex = LZ4_getIndexOnHash(h, dictCtx->hashTable, byU32); + match = dictBase + matchIndex; + lowLimit = dictionary; /* required for match length counter */ + matchIndex += dictDelta; + } else { + match = base + matchIndex; + lowLimit = (const BYTE*)source; /* required for match length counter */ + } + } else if (dictDirective==usingExtDict) { + if (matchIndex < startIndex) { + assert(dictBase); + match = dictBase + matchIndex; + lowLimit = dictionary; /* required for match length counter */ + } else { + match = base + matchIndex; + lowLimit = (const BYTE*)source; /* required for match length counter */ + } + } else { /* single memory segment */ + match = base + matchIndex; + } + LZ4_putIndexOnHash(current, h, cctx->hashTable, tableType); + assert(matchIndex < current); + if ( ((dictIssue==dictSmall) ? (matchIndex >= prefixIdxLimit) : 1) + && (((tableType==byU16) && (LZ4_DISTANCE_MAX == LZ4_DISTANCE_ABSOLUTE_MAX)) ? 1 : (matchIndex+LZ4_DISTANCE_MAX >= current)) + && (LZ4_read32(match) == LZ4_read32(ip)) ) { + token=op++; + *token=0; + if (maybe_extMem) offset = current - matchIndex; + DEBUGLOG(6, "seq.start:%i, literals=%u, match.start:%i", + (int)(anchor-(const BYTE*)source), 0, (int)(ip-(const BYTE*)source)); + goto _next_match; + } + } + + /* Prepare next loop */ + forwardH = LZ4_hashPosition(++ip, tableType); + + } + +_last_literals: + /* Encode Last Literals */ + { size_t lastRun = (size_t)(iend - anchor); + if ( (outputDirective) && /* Check output buffer overflow */ + (op + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > olimit)) { + if (outputDirective == fillOutput) { + /* adapt lastRun to fill 'dst' */ + assert(olimit >= op); + lastRun = (size_t)(olimit-op) - 1/*token*/; + lastRun -= (lastRun + 256 - RUN_MASK) / 256; /*additional length tokens*/ + } else { + assert(outputDirective == limitedOutput); + return 0; /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */ + } + } + DEBUGLOG(6, "Final literal run : %i literals", (int)lastRun); + if (lastRun >= RUN_MASK) { + size_t accumulator = lastRun - RUN_MASK; + *op++ = RUN_MASK << ML_BITS; + for(; accumulator >= 255 ; accumulator-=255) *op++ = 255; + *op++ = (BYTE) accumulator; + } else { + *op++ = (BYTE)(lastRun< 0); + DEBUGLOG(5, "LZ4_compress_generic: compressed %i bytes into %i bytes", inputSize, result); + return result; +} + +/** LZ4_compress_generic() : + * inlined, to ensure branches are decided at compilation time; + * takes care of src == (NULL, 0) + * and forward the rest to LZ4_compress_generic_validated */ +LZ4_FORCE_INLINE int LZ4_compress_generic( + LZ4_stream_t_internal* const cctx, + const char* const src, + char* const dst, + const int srcSize, + int *inputConsumed, /* only written when outputDirective == fillOutput */ + const int dstCapacity, + const limitedOutput_directive outputDirective, + const tableType_t tableType, + const dict_directive dictDirective, + const dictIssue_directive dictIssue, + const int acceleration) +{ + DEBUGLOG(5, "LZ4_compress_generic: srcSize=%i, dstCapacity=%i", + srcSize, dstCapacity); + + if ((U32)srcSize > (U32)LZ4_MAX_INPUT_SIZE) { return 0; } /* Unsupported srcSize, too large (or negative) */ + if (srcSize == 0) { /* src == NULL supported if srcSize == 0 */ + if (outputDirective != notLimited && dstCapacity <= 0) return 0; /* no output, can't write anything */ + DEBUGLOG(5, "Generating an empty block"); + assert(outputDirective == notLimited || dstCapacity >= 1); + assert(dst != NULL); + dst[0] = 0; + if (outputDirective == fillOutput) { + assert (inputConsumed != NULL); + *inputConsumed = 0; + } + return 1; + } + assert(src != NULL); + + return LZ4_compress_generic_validated(cctx, src, dst, srcSize, + inputConsumed, /* only written into if outputDirective == fillOutput */ + dstCapacity, outputDirective, + tableType, dictDirective, dictIssue, acceleration); +} + + +int LZ4_compress_fast_extState(void* state, const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration) +{ + LZ4_stream_t_internal* const ctx = & LZ4_initStream(state, sizeof(LZ4_stream_t)) -> internal_donotuse; + assert(ctx != NULL); + if (acceleration < 1) acceleration = LZ4_ACCELERATION_DEFAULT; + if (acceleration > LZ4_ACCELERATION_MAX) acceleration = LZ4_ACCELERATION_MAX; + if (maxOutputSize >= LZ4_compressBound(inputSize)) { + if (inputSize < LZ4_64Klimit) { + return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, 0, notLimited, byU16, noDict, noDictIssue, acceleration); + } else { + const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)source > LZ4_DISTANCE_MAX)) ? byPtr : byU32; + return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration); + } + } else { + if (inputSize < LZ4_64Klimit) { + return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, byU16, noDict, noDictIssue, acceleration); + } else { + const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)source > LZ4_DISTANCE_MAX)) ? byPtr : byU32; + return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, noDict, noDictIssue, acceleration); + } + } +} + +/** + * LZ4_compress_fast_extState_fastReset() : + * A variant of LZ4_compress_fast_extState(). + * + * Using this variant avoids an expensive initialization step. It is only safe + * to call if the state buffer is known to be correctly initialized already + * (see comment in lz4.h on LZ4_resetStream_fast() for a definition of + * "correctly initialized"). + */ +int LZ4_compress_fast_extState_fastReset(void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration) +{ + LZ4_stream_t_internal* ctx = &((LZ4_stream_t*)state)->internal_donotuse; + if (acceleration < 1) acceleration = LZ4_ACCELERATION_DEFAULT; + if (acceleration > LZ4_ACCELERATION_MAX) acceleration = LZ4_ACCELERATION_MAX; + + if (dstCapacity >= LZ4_compressBound(srcSize)) { + if (srcSize < LZ4_64Klimit) { + const tableType_t tableType = byU16; + LZ4_prepareTable(ctx, srcSize, tableType); + if (ctx->currentOffset) { + return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, dictSmall, acceleration); + } else { + return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration); + } + } else { + const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32; + LZ4_prepareTable(ctx, srcSize, tableType); + return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration); + } + } else { + if (srcSize < LZ4_64Klimit) { + const tableType_t tableType = byU16; + LZ4_prepareTable(ctx, srcSize, tableType); + if (ctx->currentOffset) { + return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, dictSmall, acceleration); + } else { + return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, noDictIssue, acceleration); + } + } else { + const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32; + LZ4_prepareTable(ctx, srcSize, tableType); + return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, noDictIssue, acceleration); + } + } +} + + +int LZ4_compress_fast(const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration) +{ + int result; +#if (LZ4_HEAPMODE) + LZ4_stream_t* ctxPtr = (LZ4_stream_t*)ALLOC(sizeof(LZ4_stream_t)); /* malloc-calloc always properly aligned */ + if (ctxPtr == NULL) return 0; +#else + LZ4_stream_t ctx; + LZ4_stream_t* const ctxPtr = &ctx; +#endif + result = LZ4_compress_fast_extState(ctxPtr, source, dest, inputSize, maxOutputSize, acceleration); + +#if (LZ4_HEAPMODE) + FREEMEM(ctxPtr); +#endif + return result; +} + + +int LZ4_compress_default(const char* src, char* dst, int srcSize, int maxOutputSize) +{ + return LZ4_compress_fast(src, dst, srcSize, maxOutputSize, 1); +} + + +/* Note!: This function leaves the stream in an unclean/broken state! + * It is not safe to subsequently use the same state with a _fastReset() or + * _continue() call without resetting it. */ +static int LZ4_compress_destSize_extState (LZ4_stream_t* state, const char* src, char* dst, int* srcSizePtr, int targetDstSize) +{ + void* const s = LZ4_initStream(state, sizeof (*state)); + assert(s != NULL); (void)s; + + if (targetDstSize >= LZ4_compressBound(*srcSizePtr)) { /* compression success is guaranteed */ + return LZ4_compress_fast_extState(state, src, dst, *srcSizePtr, targetDstSize, 1); + } else { + if (*srcSizePtr < LZ4_64Klimit) { + return LZ4_compress_generic(&state->internal_donotuse, src, dst, *srcSizePtr, srcSizePtr, targetDstSize, fillOutput, byU16, noDict, noDictIssue, 1); + } else { + tableType_t const addrMode = ((sizeof(void*)==4) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32; + return LZ4_compress_generic(&state->internal_donotuse, src, dst, *srcSizePtr, srcSizePtr, targetDstSize, fillOutput, addrMode, noDict, noDictIssue, 1); + } } +} + + +int LZ4_compress_destSize(const char* src, char* dst, int* srcSizePtr, int targetDstSize) +{ +#if (LZ4_HEAPMODE) + LZ4_stream_t* ctx = (LZ4_stream_t*)ALLOC(sizeof(LZ4_stream_t)); /* malloc-calloc always properly aligned */ + if (ctx == NULL) return 0; +#else + LZ4_stream_t ctxBody; + LZ4_stream_t* ctx = &ctxBody; +#endif + + int result = LZ4_compress_destSize_extState(ctx, src, dst, srcSizePtr, targetDstSize); + +#if (LZ4_HEAPMODE) + FREEMEM(ctx); +#endif + return result; +} + + + +/*-****************************** +* Streaming functions +********************************/ + +#if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION) +LZ4_stream_t* LZ4_createStream(void) +{ + LZ4_stream_t* const lz4s = (LZ4_stream_t*)ALLOC(sizeof(LZ4_stream_t)); + LZ4_STATIC_ASSERT(sizeof(LZ4_stream_t) >= sizeof(LZ4_stream_t_internal)); + DEBUGLOG(4, "LZ4_createStream %p", lz4s); + if (lz4s == NULL) return NULL; + LZ4_initStream(lz4s, sizeof(*lz4s)); + return lz4s; +} +#endif + +static size_t LZ4_stream_t_alignment(void) +{ +#if LZ4_ALIGN_TEST + typedef struct { char c; LZ4_stream_t t; } t_a; + return sizeof(t_a) - sizeof(LZ4_stream_t); +#else + return 1; /* effectively disabled */ +#endif +} + +LZ4_stream_t* LZ4_initStream (void* buffer, size_t size) +{ + DEBUGLOG(5, "LZ4_initStream"); + if (buffer == NULL) { return NULL; } + if (size < sizeof(LZ4_stream_t)) { return NULL; } + if (!LZ4_isAligned(buffer, LZ4_stream_t_alignment())) return NULL; + MEM_INIT(buffer, 0, sizeof(LZ4_stream_t_internal)); + return (LZ4_stream_t*)buffer; +} + +/* resetStream is now deprecated, + * prefer initStream() which is more general */ +void LZ4_resetStream (LZ4_stream_t* LZ4_stream) +{ + DEBUGLOG(5, "LZ4_resetStream (ctx:%p)", LZ4_stream); + MEM_INIT(LZ4_stream, 0, sizeof(LZ4_stream_t_internal)); +} + +void LZ4_resetStream_fast(LZ4_stream_t* ctx) { + LZ4_prepareTable(&(ctx->internal_donotuse), 0, byU32); +} + +#if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION) +int LZ4_freeStream (LZ4_stream_t* LZ4_stream) +{ + if (!LZ4_stream) return 0; /* support free on NULL */ + DEBUGLOG(5, "LZ4_freeStream %p", LZ4_stream); + FREEMEM(LZ4_stream); + return (0); +} +#endif + + +#define HASH_UNIT sizeof(reg_t) +int LZ4_loadDict (LZ4_stream_t* LZ4_dict, const char* dictionary, int dictSize) +{ + LZ4_stream_t_internal* dict = &LZ4_dict->internal_donotuse; + const tableType_t tableType = byU32; + const BYTE* p = (const BYTE*)dictionary; + const BYTE* const dictEnd = p + dictSize; + const BYTE* base; + + DEBUGLOG(4, "LZ4_loadDict (%i bytes from %p into %p)", dictSize, dictionary, LZ4_dict); + + /* It's necessary to reset the context, + * and not just continue it with prepareTable() + * to avoid any risk of generating overflowing matchIndex + * when compressing using this dictionary */ + LZ4_resetStream(LZ4_dict); + + /* We always increment the offset by 64 KB, since, if the dict is longer, + * we truncate it to the last 64k, and if it's shorter, we still want to + * advance by a whole window length so we can provide the guarantee that + * there are only valid offsets in the window, which allows an optimization + * in LZ4_compress_fast_continue() where it uses noDictIssue even when the + * dictionary isn't a full 64k. */ + dict->currentOffset += 64 KB; + + if (dictSize < (int)HASH_UNIT) { + return 0; + } + + if ((dictEnd - p) > 64 KB) p = dictEnd - 64 KB; + base = dictEnd - dict->currentOffset; + dict->dictionary = p; + dict->dictSize = (U32)(dictEnd - p); + dict->tableType = (U32)tableType; + + while (p <= dictEnd-HASH_UNIT) { + LZ4_putPosition(p, dict->hashTable, tableType, base); + p+=3; + } + + return (int)dict->dictSize; +} + +void LZ4_attach_dictionary(LZ4_stream_t* workingStream, const LZ4_stream_t* dictionaryStream) +{ + const LZ4_stream_t_internal* dictCtx = (dictionaryStream == NULL) ? NULL : + &(dictionaryStream->internal_donotuse); + + DEBUGLOG(4, "LZ4_attach_dictionary (%p, %p, size %u)", + workingStream, dictionaryStream, + dictCtx != NULL ? dictCtx->dictSize : 0); + + if (dictCtx != NULL) { + /* If the current offset is zero, we will never look in the + * external dictionary context, since there is no value a table + * entry can take that indicate a miss. In that case, we need + * to bump the offset to something non-zero. + */ + if (workingStream->internal_donotuse.currentOffset == 0) { + workingStream->internal_donotuse.currentOffset = 64 KB; + } + + /* Don't actually attach an empty dictionary. + */ + if (dictCtx->dictSize == 0) { + dictCtx = NULL; + } + } + workingStream->internal_donotuse.dictCtx = dictCtx; +} + + +static void LZ4_renormDictT(LZ4_stream_t_internal* LZ4_dict, int nextSize) +{ + assert(nextSize >= 0); + if (LZ4_dict->currentOffset + (unsigned)nextSize > 0x80000000) { /* potential ptrdiff_t overflow (32-bits mode) */ + /* rescale hash table */ + U32 const delta = LZ4_dict->currentOffset - 64 KB; + const BYTE* dictEnd = LZ4_dict->dictionary + LZ4_dict->dictSize; + int i; + DEBUGLOG(4, "LZ4_renormDictT"); + for (i=0; ihashTable[i] < delta) LZ4_dict->hashTable[i]=0; + else LZ4_dict->hashTable[i] -= delta; + } + LZ4_dict->currentOffset = 64 KB; + if (LZ4_dict->dictSize > 64 KB) LZ4_dict->dictSize = 64 KB; + LZ4_dict->dictionary = dictEnd - LZ4_dict->dictSize; + } +} + + +int LZ4_compress_fast_continue (LZ4_stream_t* LZ4_stream, + const char* source, char* dest, + int inputSize, int maxOutputSize, + int acceleration) +{ + const tableType_t tableType = byU32; + LZ4_stream_t_internal* const streamPtr = &LZ4_stream->internal_donotuse; + const char* dictEnd = streamPtr->dictSize ? (const char*)streamPtr->dictionary + streamPtr->dictSize : NULL; + + DEBUGLOG(5, "LZ4_compress_fast_continue (inputSize=%i, dictSize=%u)", inputSize, streamPtr->dictSize); + + LZ4_renormDictT(streamPtr, inputSize); /* fix index overflow */ + if (acceleration < 1) acceleration = LZ4_ACCELERATION_DEFAULT; + if (acceleration > LZ4_ACCELERATION_MAX) acceleration = LZ4_ACCELERATION_MAX; + + /* invalidate tiny dictionaries */ + if ( (streamPtr->dictSize < 4) /* tiny dictionary : not enough for a hash */ + && (dictEnd != source) /* prefix mode */ + && (inputSize > 0) /* tolerance : don't lose history, in case next invocation would use prefix mode */ + && (streamPtr->dictCtx == NULL) /* usingDictCtx */ + ) { + DEBUGLOG(5, "LZ4_compress_fast_continue: dictSize(%u) at addr:%p is too small", streamPtr->dictSize, streamPtr->dictionary); + /* remove dictionary existence from history, to employ faster prefix mode */ + streamPtr->dictSize = 0; + streamPtr->dictionary = (const BYTE*)source; + dictEnd = source; + } + + /* Check overlapping input/dictionary space */ + { const char* const sourceEnd = source + inputSize; + if ((sourceEnd > (const char*)streamPtr->dictionary) && (sourceEnd < dictEnd)) { + streamPtr->dictSize = (U32)(dictEnd - sourceEnd); + if (streamPtr->dictSize > 64 KB) streamPtr->dictSize = 64 KB; + if (streamPtr->dictSize < 4) streamPtr->dictSize = 0; + streamPtr->dictionary = (const BYTE*)dictEnd - streamPtr->dictSize; + } + } + + /* prefix mode : source data follows dictionary */ + if (dictEnd == source) { + if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset)) + return LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, withPrefix64k, dictSmall, acceleration); + else + return LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, withPrefix64k, noDictIssue, acceleration); + } + + /* external dictionary mode */ + { int result; + if (streamPtr->dictCtx) { + /* We depend here on the fact that dictCtx'es (produced by + * LZ4_loadDict) guarantee that their tables contain no references + * to offsets between dictCtx->currentOffset - 64 KB and + * dictCtx->currentOffset - dictCtx->dictSize. This makes it safe + * to use noDictIssue even when the dict isn't a full 64 KB. + */ + if (inputSize > 4 KB) { + /* For compressing large blobs, it is faster to pay the setup + * cost to copy the dictionary's tables into the active context, + * so that the compression loop is only looking into one table. + */ + LZ4_memcpy(streamPtr, streamPtr->dictCtx, sizeof(*streamPtr)); + result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, noDictIssue, acceleration); + } else { + result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingDictCtx, noDictIssue, acceleration); + } + } else { /* small data <= 4 KB */ + if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset)) { + result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, dictSmall, acceleration); + } else { + result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, noDictIssue, acceleration); + } + } + streamPtr->dictionary = (const BYTE*)source; + streamPtr->dictSize = (U32)inputSize; + return result; + } +} + + +/* Hidden debug function, to force-test external dictionary mode */ +int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_dict, const char* source, char* dest, int srcSize) +{ + LZ4_stream_t_internal* streamPtr = &LZ4_dict->internal_donotuse; + int result; + + LZ4_renormDictT(streamPtr, srcSize); + + if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset)) { + result = LZ4_compress_generic(streamPtr, source, dest, srcSize, NULL, 0, notLimited, byU32, usingExtDict, dictSmall, 1); + } else { + result = LZ4_compress_generic(streamPtr, source, dest, srcSize, NULL, 0, notLimited, byU32, usingExtDict, noDictIssue, 1); + } + + streamPtr->dictionary = (const BYTE*)source; + streamPtr->dictSize = (U32)srcSize; + + return result; +} + + +/*! LZ4_saveDict() : + * If previously compressed data block is not guaranteed to remain available at its memory location, + * save it into a safer place (char* safeBuffer). + * Note : no need to call LZ4_loadDict() afterwards, dictionary is immediately usable, + * one can therefore call LZ4_compress_fast_continue() right after. + * @return : saved dictionary size in bytes (necessarily <= dictSize), or 0 if error. + */ +int LZ4_saveDict (LZ4_stream_t* LZ4_dict, char* safeBuffer, int dictSize) +{ + LZ4_stream_t_internal* const dict = &LZ4_dict->internal_donotuse; + + DEBUGLOG(5, "LZ4_saveDict : dictSize=%i, safeBuffer=%p", dictSize, safeBuffer); + + if ((U32)dictSize > 64 KB) { dictSize = 64 KB; } /* useless to define a dictionary > 64 KB */ + if ((U32)dictSize > dict->dictSize) { dictSize = (int)dict->dictSize; } + + if (safeBuffer == NULL) assert(dictSize == 0); + if (dictSize > 0) { + const BYTE* const previousDictEnd = dict->dictionary + dict->dictSize; + assert(dict->dictionary); + LZ4_memmove(safeBuffer, previousDictEnd - dictSize, (size_t)dictSize); + } + + dict->dictionary = (const BYTE*)safeBuffer; + dict->dictSize = (U32)dictSize; + + return dictSize; +} + + + +/*-******************************* + * Decompression functions + ********************************/ + +typedef enum { decode_full_block = 0, partial_decode = 1 } earlyEnd_directive; + +#undef MIN +#define MIN(a,b) ( (a) < (b) ? (a) : (b) ) + + +/* variant for decompress_unsafe() + * does not know end of input + * presumes input is well formed + * note : will consume at least one byte */ +size_t read_long_length_no_check(const BYTE** pp) +{ + size_t b, l = 0; + do { b = **pp; (*pp)++; l += b; } while (b==255); + DEBUGLOG(6, "read_long_length_no_check: +length=%zu using %zu input bytes", l, l/255 + 1) + return l; +} + +/* core decoder variant for LZ4_decompress_fast*() + * for legacy support only : these entry points are deprecated. + * - Presumes input is correctly formed (no defense vs malformed inputs) + * - Does not know input size (presume input buffer is "large enough") + * - Decompress a full block (only) + * @return : nb of bytes read from input. + * Note : this variant is not optimized for speed, just for maintenance. + * the goal is to remove support of decompress_fast*() variants by v2.0 +**/ +LZ4_FORCE_INLINE int +LZ4_decompress_unsafe_generic( + const BYTE* const istart, + BYTE* const ostart, + int decompressedSize, + + size_t prefixSize, + const BYTE* const dictStart, /* only if dict==usingExtDict */ + const size_t dictSize /* note: =0 if dictStart==NULL */ + ) +{ + const BYTE* ip = istart; + BYTE* op = (BYTE*)ostart; + BYTE* const oend = ostart + decompressedSize; + const BYTE* const prefixStart = ostart - prefixSize; + + DEBUGLOG(5, "LZ4_decompress_unsafe_generic"); + if (dictStart == NULL) assert(dictSize == 0); + + while (1) { + /* start new sequence */ + unsigned token = *ip++; + + /* literals */ + { size_t ll = token >> ML_BITS; + if (ll==15) { + /* long literal length */ + ll += read_long_length_no_check(&ip); + } + if ((size_t)(oend-op) < ll) return -1; /* output buffer overflow */ + LZ4_memmove(op, ip, ll); /* support in-place decompression */ + op += ll; + ip += ll; + if ((size_t)(oend-op) < MFLIMIT) { + if (op==oend) break; /* end of block */ + DEBUGLOG(5, "invalid: literals end at distance %zi from end of block", oend-op); + /* incorrect end of block : + * last match must start at least MFLIMIT==12 bytes before end of output block */ + return -1; + } } + + /* match */ + { size_t ml = token & 15; + size_t const offset = LZ4_readLE16(ip); + ip+=2; + + if (ml==15) { + /* long literal length */ + ml += read_long_length_no_check(&ip); + } + ml += MINMATCH; + + if ((size_t)(oend-op) < ml) return -1; /* output buffer overflow */ + + { const BYTE* match = op - offset; + + /* out of range */ + if (offset > (size_t)(op - prefixStart) + dictSize) { + DEBUGLOG(6, "offset out of range"); + return -1; + } + + /* check special case : extDict */ + if (offset > (size_t)(op - prefixStart)) { + /* extDict scenario */ + const BYTE* const dictEnd = dictStart + dictSize; + const BYTE* extMatch = dictEnd - (offset - (size_t)(op-prefixStart)); + size_t const extml = (size_t)(dictEnd - extMatch); + if (extml > ml) { + /* match entirely within extDict */ + LZ4_memmove(op, extMatch, ml); + op += ml; + ml = 0; + } else { + /* match split between extDict & prefix */ + LZ4_memmove(op, extMatch, extml); + op += extml; + ml -= extml; + } + match = prefixStart; + } + + /* match copy - slow variant, supporting overlap copy */ + { size_t u; + for (u=0; u= ipmax before start of loop. Returns initial_error if so. + * @error (output) - error code. Must be set to 0 before call. +**/ +typedef size_t Rvl_t; +static const Rvl_t rvl_error = (Rvl_t)(-1); +LZ4_FORCE_INLINE Rvl_t +read_variable_length(const BYTE** ip, const BYTE* ilimit, + int initial_check) +{ + Rvl_t s, length = 0; + assert(ip != NULL); + assert(*ip != NULL); + assert(ilimit != NULL); + if (initial_check && unlikely((*ip) >= ilimit)) { /* read limit reached */ + return rvl_error; + } + do { + s = **ip; + (*ip)++; + length += s; + if (unlikely((*ip) > ilimit)) { /* read limit reached */ + return rvl_error; + } + /* accumulator overflow detection (32-bit mode only) */ + if ((sizeof(length)<8) && unlikely(length > ((Rvl_t)(-1)/2)) ) { + return rvl_error; + } + } while (s==255); + + return length; +} + +/*! LZ4_decompress_generic() : + * This generic decompression function covers all use cases. + * It shall be instantiated several times, using different sets of directives. + * Note that it is important for performance that this function really get inlined, + * in order to remove useless branches during compilation optimization. + */ +LZ4_FORCE_INLINE int +LZ4_decompress_generic( + const char* const src, + char* const dst, + int srcSize, + int outputSize, /* If endOnInput==endOnInputSize, this value is `dstCapacity` */ + + earlyEnd_directive partialDecoding, /* full, partial */ + dict_directive dict, /* noDict, withPrefix64k, usingExtDict */ + const BYTE* const lowPrefix, /* always <= dst, == dst when no prefix */ + const BYTE* const dictStart, /* only if dict==usingExtDict */ + const size_t dictSize /* note : = 0 if noDict */ + ) +{ + if ((src == NULL) || (outputSize < 0)) { return -1; } + + { const BYTE* ip = (const BYTE*) src; + const BYTE* const iend = ip + srcSize; + + BYTE* op = (BYTE*) dst; + BYTE* const oend = op + outputSize; + BYTE* cpy; + + const BYTE* const dictEnd = (dictStart == NULL) ? NULL : dictStart + dictSize; + + const int checkOffset = (dictSize < (int)(64 KB)); + + + /* Set up the "end" pointers for the shortcut. */ + const BYTE* const shortiend = iend - 14 /*maxLL*/ - 2 /*offset*/; + const BYTE* const shortoend = oend - 14 /*maxLL*/ - 18 /*maxML*/; + + const BYTE* match; + size_t offset; + unsigned token; + size_t length; + + + DEBUGLOG(5, "LZ4_decompress_generic (srcSize:%i, dstSize:%i)", srcSize, outputSize); + + /* Special cases */ + assert(lowPrefix <= op); + if (unlikely(outputSize==0)) { + /* Empty output buffer */ + if (partialDecoding) return 0; + return ((srcSize==1) && (*ip==0)) ? 0 : -1; + } + if (unlikely(srcSize==0)) { return -1; } + + /* LZ4_FAST_DEC_LOOP: + * designed for modern OoO performance cpus, + * where copying reliably 32-bytes is preferable to an unpredictable branch. + * note : fast loop may show a regression for some client arm chips. */ +#if LZ4_FAST_DEC_LOOP + if ((oend - op) < FASTLOOP_SAFE_DISTANCE) { + DEBUGLOG(6, "skip fast decode loop"); + goto safe_decode; + } + + /* Fast loop : decode sequences as long as output < oend-FASTLOOP_SAFE_DISTANCE */ + while (1) { + /* Main fastloop assertion: We can always wildcopy FASTLOOP_SAFE_DISTANCE */ + assert(oend - op >= FASTLOOP_SAFE_DISTANCE); + assert(ip < iend); + token = *ip++; + length = token >> ML_BITS; /* literal length */ + + /* decode literal length */ + if (length == RUN_MASK) { + size_t const addl = read_variable_length(&ip, iend-RUN_MASK, 1); + if (addl == rvl_error) { goto _output_error; } + length += addl; + if (unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */ + if (unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */ + + /* copy literals */ + cpy = op+length; + LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH); + if ((cpy>oend-32) || (ip+length>iend-32)) { goto safe_literal_copy; } + LZ4_wildCopy32(op, ip, cpy); + ip += length; op = cpy; + } else { + cpy = op+length; + DEBUGLOG(7, "copy %u bytes in a 16-bytes stripe", (unsigned)length); + /* We don't need to check oend, since we check it once for each loop below */ + if (ip > iend-(16 + 1/*max lit + offset + nextToken*/)) { goto safe_literal_copy; } + /* Literals can only be <= 14, but hope compilers optimize better when copy by a register size */ + LZ4_memcpy(op, ip, 16); + ip += length; op = cpy; + } + + /* get offset */ + offset = LZ4_readLE16(ip); ip+=2; + match = op - offset; + assert(match <= op); /* overflow check */ + + /* get matchlength */ + length = token & ML_MASK; + + if (length == ML_MASK) { + size_t const addl = read_variable_length(&ip, iend - LASTLITERALS + 1, 0); + if (addl == rvl_error) { goto _output_error; } + length += addl; + length += MINMATCH; + if (unlikely((uptrval)(op)+length<(uptrval)op)) { goto _output_error; } /* overflow detection */ + if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) { goto _output_error; } /* Error : offset outside buffers */ + if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) { + goto safe_match_copy; + } + } else { + length += MINMATCH; + if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) { + goto safe_match_copy; + } + + /* Fastpath check: skip LZ4_wildCopy32 when true */ + if ((dict == withPrefix64k) || (match >= lowPrefix)) { + if (offset >= 8) { + assert(match >= lowPrefix); + assert(match <= op); + assert(op + 18 <= oend); + + LZ4_memcpy(op, match, 8); + LZ4_memcpy(op+8, match+8, 8); + LZ4_memcpy(op+16, match+16, 2); + op += length; + continue; + } } } + + if (checkOffset && (unlikely(match + dictSize < lowPrefix))) { goto _output_error; } /* Error : offset outside buffers */ + /* match starting within external dictionary */ + if ((dict==usingExtDict) && (match < lowPrefix)) { + assert(dictEnd != NULL); + if (unlikely(op+length > oend-LASTLITERALS)) { + if (partialDecoding) { + DEBUGLOG(7, "partialDecoding: dictionary match, close to dstEnd"); + length = MIN(length, (size_t)(oend-op)); + } else { + goto _output_error; /* end-of-block condition violated */ + } } + + if (length <= (size_t)(lowPrefix-match)) { + /* match fits entirely within external dictionary : just copy */ + LZ4_memmove(op, dictEnd - (lowPrefix-match), length); + op += length; + } else { + /* match stretches into both external dictionary and current block */ + size_t const copySize = (size_t)(lowPrefix - match); + size_t const restSize = length - copySize; + LZ4_memcpy(op, dictEnd - copySize, copySize); + op += copySize; + if (restSize > (size_t)(op - lowPrefix)) { /* overlap copy */ + BYTE* const endOfMatch = op + restSize; + const BYTE* copyFrom = lowPrefix; + while (op < endOfMatch) { *op++ = *copyFrom++; } + } else { + LZ4_memcpy(op, lowPrefix, restSize); + op += restSize; + } } + continue; + } + + /* copy match within block */ + cpy = op + length; + + assert((op <= oend) && (oend-op >= 32)); + if (unlikely(offset<16)) { + LZ4_memcpy_using_offset(op, match, cpy, offset); + } else { + LZ4_wildCopy32(op, match, cpy); + } + + op = cpy; /* wildcopy correction */ + } + safe_decode: +#endif + + /* Main Loop : decode remaining sequences where output < FASTLOOP_SAFE_DISTANCE */ + while (1) { + assert(ip < iend); + token = *ip++; + length = token >> ML_BITS; /* literal length */ + + /* A two-stage shortcut for the most common case: + * 1) If the literal length is 0..14, and there is enough space, + * enter the shortcut and copy 16 bytes on behalf of the literals + * (in the fast mode, only 8 bytes can be safely copied this way). + * 2) Further if the match length is 4..18, copy 18 bytes in a similar + * manner; but we ensure that there's enough space in the output for + * those 18 bytes earlier, upon entering the shortcut (in other words, + * there is a combined check for both stages). + */ + if ( (length != RUN_MASK) + /* strictly "less than" on input, to re-enter the loop with at least one byte */ + && likely((ip < shortiend) & (op <= shortoend)) ) { + /* Copy the literals */ + LZ4_memcpy(op, ip, 16); + op += length; ip += length; + + /* The second stage: prepare for match copying, decode full info. + * If it doesn't work out, the info won't be wasted. */ + length = token & ML_MASK; /* match length */ + offset = LZ4_readLE16(ip); ip += 2; + match = op - offset; + assert(match <= op); /* check overflow */ + + /* Do not deal with overlapping matches. */ + if ( (length != ML_MASK) + && (offset >= 8) + && (dict==withPrefix64k || match >= lowPrefix) ) { + /* Copy the match. */ + LZ4_memcpy(op + 0, match + 0, 8); + LZ4_memcpy(op + 8, match + 8, 8); + LZ4_memcpy(op +16, match +16, 2); + op += length + MINMATCH; + /* Both stages worked, load the next token. */ + continue; + } + + /* The second stage didn't work out, but the info is ready. + * Propel it right to the point of match copying. */ + goto _copy_match; + } + + /* decode literal length */ + if (length == RUN_MASK) { + size_t const addl = read_variable_length(&ip, iend-RUN_MASK, 1); + if (addl == rvl_error) { goto _output_error; } + length += addl; + if (unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */ + if (unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */ + } + + /* copy literals */ + cpy = op+length; +#if LZ4_FAST_DEC_LOOP + safe_literal_copy: +#endif + LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH); + if ((cpy>oend-MFLIMIT) || (ip+length>iend-(2+1+LASTLITERALS))) { + /* We've either hit the input parsing restriction or the output parsing restriction. + * In the normal scenario, decoding a full block, it must be the last sequence, + * otherwise it's an error (invalid input or dimensions). + * In partialDecoding scenario, it's necessary to ensure there is no buffer overflow. + */ + if (partialDecoding) { + /* Since we are partial decoding we may be in this block because of the output parsing + * restriction, which is not valid since the output buffer is allowed to be undersized. + */ + DEBUGLOG(7, "partialDecoding: copying literals, close to input or output end") + DEBUGLOG(7, "partialDecoding: literal length = %u", (unsigned)length); + DEBUGLOG(7, "partialDecoding: remaining space in dstBuffer : %i", (int)(oend - op)); + DEBUGLOG(7, "partialDecoding: remaining space in srcBuffer : %i", (int)(iend - ip)); + /* Finishing in the middle of a literals segment, + * due to lack of input. + */ + if (ip+length > iend) { + length = (size_t)(iend-ip); + cpy = op + length; + } + /* Finishing in the middle of a literals segment, + * due to lack of output space. + */ + if (cpy > oend) { + cpy = oend; + assert(op<=oend); + length = (size_t)(oend-op); + } + } else { + /* We must be on the last sequence (or invalid) because of the parsing limitations + * so check that we exactly consume the input and don't overrun the output buffer. + */ + if ((ip+length != iend) || (cpy > oend)) { + DEBUGLOG(6, "should have been last run of literals") + DEBUGLOG(6, "ip(%p) + length(%i) = %p != iend (%p)", ip, (int)length, ip+length, iend); + DEBUGLOG(6, "or cpy(%p) > oend(%p)", cpy, oend); + goto _output_error; + } + } + LZ4_memmove(op, ip, length); /* supports overlapping memory regions, for in-place decompression scenarios */ + ip += length; + op += length; + /* Necessarily EOF when !partialDecoding. + * When partialDecoding, it is EOF if we've either + * filled the output buffer or + * can't proceed with reading an offset for following match. + */ + if (!partialDecoding || (cpy == oend) || (ip >= (iend-2))) { + break; + } + } else { + LZ4_wildCopy8(op, ip, cpy); /* can overwrite up to 8 bytes beyond cpy */ + ip += length; op = cpy; + } + + /* get offset */ + offset = LZ4_readLE16(ip); ip+=2; + match = op - offset; + + /* get matchlength */ + length = token & ML_MASK; + + _copy_match: + if (length == ML_MASK) { + size_t const addl = read_variable_length(&ip, iend - LASTLITERALS + 1, 0); + if (addl == rvl_error) { goto _output_error; } + length += addl; + if (unlikely((uptrval)(op)+length<(uptrval)op)) goto _output_error; /* overflow detection */ + } + length += MINMATCH; + +#if LZ4_FAST_DEC_LOOP + safe_match_copy: +#endif + if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) goto _output_error; /* Error : offset outside buffers */ + /* match starting within external dictionary */ + if ((dict==usingExtDict) && (match < lowPrefix)) { + assert(dictEnd != NULL); + if (unlikely(op+length > oend-LASTLITERALS)) { + if (partialDecoding) length = MIN(length, (size_t)(oend-op)); + else goto _output_error; /* doesn't respect parsing restriction */ + } + + if (length <= (size_t)(lowPrefix-match)) { + /* match fits entirely within external dictionary : just copy */ + LZ4_memmove(op, dictEnd - (lowPrefix-match), length); + op += length; + } else { + /* match stretches into both external dictionary and current block */ + size_t const copySize = (size_t)(lowPrefix - match); + size_t const restSize = length - copySize; + LZ4_memcpy(op, dictEnd - copySize, copySize); + op += copySize; + if (restSize > (size_t)(op - lowPrefix)) { /* overlap copy */ + BYTE* const endOfMatch = op + restSize; + const BYTE* copyFrom = lowPrefix; + while (op < endOfMatch) *op++ = *copyFrom++; + } else { + LZ4_memcpy(op, lowPrefix, restSize); + op += restSize; + } } + continue; + } + assert(match >= lowPrefix); + + /* copy match within block */ + cpy = op + length; + + /* partialDecoding : may end anywhere within the block */ + assert(op<=oend); + if (partialDecoding && (cpy > oend-MATCH_SAFEGUARD_DISTANCE)) { + size_t const mlen = MIN(length, (size_t)(oend-op)); + const BYTE* const matchEnd = match + mlen; + BYTE* const copyEnd = op + mlen; + if (matchEnd > op) { /* overlap copy */ + while (op < copyEnd) { *op++ = *match++; } + } else { + LZ4_memcpy(op, match, mlen); + } + op = copyEnd; + if (op == oend) { break; } + continue; + } + + if (unlikely(offset<8)) { + LZ4_write32(op, 0); /* silence msan warning when offset==0 */ + op[0] = match[0]; + op[1] = match[1]; + op[2] = match[2]; + op[3] = match[3]; + match += inc32table[offset]; + LZ4_memcpy(op+4, match, 4); + match -= dec64table[offset]; + } else { + LZ4_memcpy(op, match, 8); + match += 8; + } + op += 8; + + if (unlikely(cpy > oend-MATCH_SAFEGUARD_DISTANCE)) { + BYTE* const oCopyLimit = oend - (WILDCOPYLENGTH-1); + if (cpy > oend-LASTLITERALS) { goto _output_error; } /* Error : last LASTLITERALS bytes must be literals (uncompressed) */ + if (op < oCopyLimit) { + LZ4_wildCopy8(op, match, oCopyLimit); + match += oCopyLimit - op; + op = oCopyLimit; + } + while (op < cpy) { *op++ = *match++; } + } else { + LZ4_memcpy(op, match, 8); + if (length > 16) { LZ4_wildCopy8(op+8, match+8, cpy); } + } + op = cpy; /* wildcopy correction */ + } + + /* end of decoding */ + DEBUGLOG(5, "decoded %i bytes", (int) (((char*)op)-dst)); + return (int) (((char*)op)-dst); /* Nb of output bytes decoded */ + + /* Overflow error detected */ + _output_error: + return (int) (-(((const char*)ip)-src))-1; + } +} + + +/*===== Instantiate the API decoding functions. =====*/ + +LZ4_FORCE_O2 +int LZ4_decompress_safe(const char* source, char* dest, int compressedSize, int maxDecompressedSize) +{ + return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize, + decode_full_block, noDict, + (BYTE*)dest, NULL, 0); +} + +LZ4_FORCE_O2 +int LZ4_decompress_safe_partial(const char* src, char* dst, int compressedSize, int targetOutputSize, int dstCapacity) +{ + dstCapacity = MIN(targetOutputSize, dstCapacity); + return LZ4_decompress_generic(src, dst, compressedSize, dstCapacity, + partial_decode, + noDict, (BYTE*)dst, NULL, 0); +} + +LZ4_FORCE_O2 +int LZ4_decompress_fast(const char* source, char* dest, int originalSize) +{ + DEBUGLOG(5, "LZ4_decompress_fast"); + return LZ4_decompress_unsafe_generic( + (const BYTE*)source, (BYTE*)dest, originalSize, + 0, NULL, 0); +} + +/*===== Instantiate a few more decoding cases, used more than once. =====*/ + +LZ4_FORCE_O2 /* Exported, an obsolete API function. */ +int LZ4_decompress_safe_withPrefix64k(const char* source, char* dest, int compressedSize, int maxOutputSize) +{ + return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, + decode_full_block, withPrefix64k, + (BYTE*)dest - 64 KB, NULL, 0); +} + +LZ4_FORCE_O2 +static int LZ4_decompress_safe_partial_withPrefix64k(const char* source, char* dest, int compressedSize, int targetOutputSize, int dstCapacity) +{ + dstCapacity = MIN(targetOutputSize, dstCapacity); + return LZ4_decompress_generic(source, dest, compressedSize, dstCapacity, + partial_decode, withPrefix64k, + (BYTE*)dest - 64 KB, NULL, 0); +} + +/* Another obsolete API function, paired with the previous one. */ +int LZ4_decompress_fast_withPrefix64k(const char* source, char* dest, int originalSize) +{ + return LZ4_decompress_unsafe_generic( + (const BYTE*)source, (BYTE*)dest, originalSize, + 64 KB, NULL, 0); +} + +LZ4_FORCE_O2 +static int LZ4_decompress_safe_withSmallPrefix(const char* source, char* dest, int compressedSize, int maxOutputSize, + size_t prefixSize) +{ + return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, + decode_full_block, noDict, + (BYTE*)dest-prefixSize, NULL, 0); +} + +LZ4_FORCE_O2 +static int LZ4_decompress_safe_partial_withSmallPrefix(const char* source, char* dest, int compressedSize, int targetOutputSize, int dstCapacity, + size_t prefixSize) +{ + dstCapacity = MIN(targetOutputSize, dstCapacity); + return LZ4_decompress_generic(source, dest, compressedSize, dstCapacity, + partial_decode, noDict, + (BYTE*)dest-prefixSize, NULL, 0); +} + +LZ4_FORCE_O2 +int LZ4_decompress_safe_forceExtDict(const char* source, char* dest, + int compressedSize, int maxOutputSize, + const void* dictStart, size_t dictSize) +{ + return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, + decode_full_block, usingExtDict, + (BYTE*)dest, (const BYTE*)dictStart, dictSize); +} + +LZ4_FORCE_O2 +int LZ4_decompress_safe_partial_forceExtDict(const char* source, char* dest, + int compressedSize, int targetOutputSize, int dstCapacity, + const void* dictStart, size_t dictSize) +{ + dstCapacity = MIN(targetOutputSize, dstCapacity); + return LZ4_decompress_generic(source, dest, compressedSize, dstCapacity, + partial_decode, usingExtDict, + (BYTE*)dest, (const BYTE*)dictStart, dictSize); +} + +LZ4_FORCE_O2 +static int LZ4_decompress_fast_extDict(const char* source, char* dest, int originalSize, + const void* dictStart, size_t dictSize) +{ + return LZ4_decompress_unsafe_generic( + (const BYTE*)source, (BYTE*)dest, originalSize, + 0, (const BYTE*)dictStart, dictSize); +} + +/* The "double dictionary" mode, for use with e.g. ring buffers: the first part + * of the dictionary is passed as prefix, and the second via dictStart + dictSize. + * These routines are used only once, in LZ4_decompress_*_continue(). + */ +LZ4_FORCE_INLINE +int LZ4_decompress_safe_doubleDict(const char* source, char* dest, int compressedSize, int maxOutputSize, + size_t prefixSize, const void* dictStart, size_t dictSize) +{ + return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, + decode_full_block, usingExtDict, + (BYTE*)dest-prefixSize, (const BYTE*)dictStart, dictSize); +} + +/*===== streaming decompression functions =====*/ + +#if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION) +LZ4_streamDecode_t* LZ4_createStreamDecode(void) +{ + LZ4_STATIC_ASSERT(sizeof(LZ4_streamDecode_t) >= sizeof(LZ4_streamDecode_t_internal)); + return (LZ4_streamDecode_t*) ALLOC_AND_ZERO(sizeof(LZ4_streamDecode_t)); +} + +int LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream) +{ + if (LZ4_stream == NULL) { return 0; } /* support free on NULL */ + FREEMEM(LZ4_stream); + return 0; +} +#endif + +/*! LZ4_setStreamDecode() : + * Use this function to instruct where to find the dictionary. + * This function is not necessary if previous data is still available where it was decoded. + * Loading a size of 0 is allowed (same effect as no dictionary). + * @return : 1 if OK, 0 if error + */ +int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize) +{ + LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse; + lz4sd->prefixSize = (size_t)dictSize; + if (dictSize) { + assert(dictionary != NULL); + lz4sd->prefixEnd = (const BYTE*) dictionary + dictSize; + } else { + lz4sd->prefixEnd = (const BYTE*) dictionary; + } + lz4sd->externalDict = NULL; + lz4sd->extDictSize = 0; + return 1; +} + +/*! LZ4_decoderRingBufferSize() : + * when setting a ring buffer for streaming decompression (optional scenario), + * provides the minimum size of this ring buffer + * to be compatible with any source respecting maxBlockSize condition. + * Note : in a ring buffer scenario, + * blocks are presumed decompressed next to each other. + * When not enough space remains for next block (remainingSize < maxBlockSize), + * decoding resumes from beginning of ring buffer. + * @return : minimum ring buffer size, + * or 0 if there is an error (invalid maxBlockSize). + */ +int LZ4_decoderRingBufferSize(int maxBlockSize) +{ + if (maxBlockSize < 0) return 0; + if (maxBlockSize > LZ4_MAX_INPUT_SIZE) return 0; + if (maxBlockSize < 16) maxBlockSize = 16; + return LZ4_DECODER_RING_BUFFER_SIZE(maxBlockSize); +} + +/* +*_continue() : + These decoding functions allow decompression of multiple blocks in "streaming" mode. + Previously decoded blocks must still be available at the memory position where they were decoded. + If it's not possible, save the relevant part of decoded data into a safe buffer, + and indicate where it stands using LZ4_setStreamDecode() +*/ +LZ4_FORCE_O2 +int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxOutputSize) +{ + LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse; + int result; + + if (lz4sd->prefixSize == 0) { + /* The first call, no dictionary yet. */ + assert(lz4sd->extDictSize == 0); + result = LZ4_decompress_safe(source, dest, compressedSize, maxOutputSize); + if (result <= 0) return result; + lz4sd->prefixSize = (size_t)result; + lz4sd->prefixEnd = (BYTE*)dest + result; + } else if (lz4sd->prefixEnd == (BYTE*)dest) { + /* They're rolling the current segment. */ + if (lz4sd->prefixSize >= 64 KB - 1) + result = LZ4_decompress_safe_withPrefix64k(source, dest, compressedSize, maxOutputSize); + else if (lz4sd->extDictSize == 0) + result = LZ4_decompress_safe_withSmallPrefix(source, dest, compressedSize, maxOutputSize, + lz4sd->prefixSize); + else + result = LZ4_decompress_safe_doubleDict(source, dest, compressedSize, maxOutputSize, + lz4sd->prefixSize, lz4sd->externalDict, lz4sd->extDictSize); + if (result <= 0) return result; + lz4sd->prefixSize += (size_t)result; + lz4sd->prefixEnd += result; + } else { + /* The buffer wraps around, or they're switching to another buffer. */ + lz4sd->extDictSize = lz4sd->prefixSize; + lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize; + result = LZ4_decompress_safe_forceExtDict(source, dest, compressedSize, maxOutputSize, + lz4sd->externalDict, lz4sd->extDictSize); + if (result <= 0) return result; + lz4sd->prefixSize = (size_t)result; + lz4sd->prefixEnd = (BYTE*)dest + result; + } + + return result; +} + +LZ4_FORCE_O2 int +LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, + const char* source, char* dest, int originalSize) +{ + LZ4_streamDecode_t_internal* const lz4sd = + (assert(LZ4_streamDecode!=NULL), &LZ4_streamDecode->internal_donotuse); + int result; + + DEBUGLOG(5, "LZ4_decompress_fast_continue (toDecodeSize=%i)", originalSize); + assert(originalSize >= 0); + + if (lz4sd->prefixSize == 0) { + DEBUGLOG(5, "first invocation : no prefix nor extDict"); + assert(lz4sd->extDictSize == 0); + result = LZ4_decompress_fast(source, dest, originalSize); + if (result <= 0) return result; + lz4sd->prefixSize = (size_t)originalSize; + lz4sd->prefixEnd = (BYTE*)dest + originalSize; + } else if (lz4sd->prefixEnd == (BYTE*)dest) { + DEBUGLOG(5, "continue using existing prefix"); + result = LZ4_decompress_unsafe_generic( + (const BYTE*)source, (BYTE*)dest, originalSize, + lz4sd->prefixSize, + lz4sd->externalDict, lz4sd->extDictSize); + if (result <= 0) return result; + lz4sd->prefixSize += (size_t)originalSize; + lz4sd->prefixEnd += originalSize; + } else { + DEBUGLOG(5, "prefix becomes extDict"); + lz4sd->extDictSize = lz4sd->prefixSize; + lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize; + result = LZ4_decompress_fast_extDict(source, dest, originalSize, + lz4sd->externalDict, lz4sd->extDictSize); + if (result <= 0) return result; + lz4sd->prefixSize = (size_t)originalSize; + lz4sd->prefixEnd = (BYTE*)dest + originalSize; + } + + return result; +} + + +/* +Advanced decoding functions : +*_usingDict() : + These decoding functions work the same as "_continue" ones, + the dictionary must be explicitly provided within parameters +*/ + +int LZ4_decompress_safe_usingDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const char* dictStart, int dictSize) +{ + if (dictSize==0) + return LZ4_decompress_safe(source, dest, compressedSize, maxOutputSize); + if (dictStart+dictSize == dest) { + if (dictSize >= 64 KB - 1) { + return LZ4_decompress_safe_withPrefix64k(source, dest, compressedSize, maxOutputSize); + } + assert(dictSize >= 0); + return LZ4_decompress_safe_withSmallPrefix(source, dest, compressedSize, maxOutputSize, (size_t)dictSize); + } + assert(dictSize >= 0); + return LZ4_decompress_safe_forceExtDict(source, dest, compressedSize, maxOutputSize, dictStart, (size_t)dictSize); +} + +int LZ4_decompress_safe_partial_usingDict(const char* source, char* dest, int compressedSize, int targetOutputSize, int dstCapacity, const char* dictStart, int dictSize) +{ + if (dictSize==0) + return LZ4_decompress_safe_partial(source, dest, compressedSize, targetOutputSize, dstCapacity); + if (dictStart+dictSize == dest) { + if (dictSize >= 64 KB - 1) { + return LZ4_decompress_safe_partial_withPrefix64k(source, dest, compressedSize, targetOutputSize, dstCapacity); + } + assert(dictSize >= 0); + return LZ4_decompress_safe_partial_withSmallPrefix(source, dest, compressedSize, targetOutputSize, dstCapacity, (size_t)dictSize); + } + assert(dictSize >= 0); + return LZ4_decompress_safe_partial_forceExtDict(source, dest, compressedSize, targetOutputSize, dstCapacity, dictStart, (size_t)dictSize); +} + +int LZ4_decompress_fast_usingDict(const char* source, char* dest, int originalSize, const char* dictStart, int dictSize) +{ + if (dictSize==0 || dictStart+dictSize == dest) + return LZ4_decompress_unsafe_generic( + (const BYTE*)source, (BYTE*)dest, originalSize, + (size_t)dictSize, NULL, 0); + assert(dictSize >= 0); + return LZ4_decompress_fast_extDict(source, dest, originalSize, dictStart, (size_t)dictSize); +} + + +/*=************************************************* +* Obsolete Functions +***************************************************/ +/* obsolete compression functions */ +int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize) +{ + return LZ4_compress_default(source, dest, inputSize, maxOutputSize); +} +int LZ4_compress(const char* src, char* dest, int srcSize) +{ + return LZ4_compress_default(src, dest, srcSize, LZ4_compressBound(srcSize)); +} +int LZ4_compress_limitedOutput_withState (void* state, const char* src, char* dst, int srcSize, int dstSize) +{ + return LZ4_compress_fast_extState(state, src, dst, srcSize, dstSize, 1); +} +int LZ4_compress_withState (void* state, const char* src, char* dst, int srcSize) +{ + return LZ4_compress_fast_extState(state, src, dst, srcSize, LZ4_compressBound(srcSize), 1); +} +int LZ4_compress_limitedOutput_continue (LZ4_stream_t* LZ4_stream, const char* src, char* dst, int srcSize, int dstCapacity) +{ + return LZ4_compress_fast_continue(LZ4_stream, src, dst, srcSize, dstCapacity, 1); +} +int LZ4_compress_continue (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize) +{ + return LZ4_compress_fast_continue(LZ4_stream, source, dest, inputSize, LZ4_compressBound(inputSize), 1); +} + +/* +These decompression functions are deprecated and should no longer be used. +They are only provided here for compatibility with older user programs. +- LZ4_uncompress is totally equivalent to LZ4_decompress_fast +- LZ4_uncompress_unknownOutputSize is totally equivalent to LZ4_decompress_safe +*/ +int LZ4_uncompress (const char* source, char* dest, int outputSize) +{ + return LZ4_decompress_fast(source, dest, outputSize); +} +int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize) +{ + return LZ4_decompress_safe(source, dest, isize, maxOutputSize); +} + +/* Obsolete Streaming functions */ + +int LZ4_sizeofStreamState(void) { return sizeof(LZ4_stream_t); } + +int LZ4_resetStreamState(void* state, char* inputBuffer) +{ + (void)inputBuffer; + LZ4_resetStream((LZ4_stream_t*)state); + return 0; +} + +#if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION) +void* LZ4_create (char* inputBuffer) +{ + (void)inputBuffer; + return LZ4_createStream(); +} +#endif + +char* LZ4_slideInputBuffer (void* state) +{ + /* avoid const char * -> char * conversion warning */ + return (char *)(uptrval)((LZ4_stream_t*)state)->internal_donotuse.dictionary; +} + +#endif /* LZ4_COMMONDEFS_ONLY */ diff --git a/c-blosc/internal-complibs/lz4-1.9.4/lz4.h b/c-blosc/internal-complibs/lz4-1.9.4/lz4.h new file mode 100644 index 0000000..491c608 --- /dev/null +++ b/c-blosc/internal-complibs/lz4-1.9.4/lz4.h @@ -0,0 +1,842 @@ +/* + * LZ4 - Fast LZ compression algorithm + * Header File + * Copyright (C) 2011-2020, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - LZ4 homepage : http://www.lz4.org + - LZ4 source repository : https://github.com/lz4/lz4 +*/ +#if defined (__cplusplus) +extern "C" { +#endif + +#ifndef LZ4_H_2983827168210 +#define LZ4_H_2983827168210 + +/* --- Dependency --- */ +#include /* size_t */ + + +/** + Introduction + + LZ4 is lossless compression algorithm, providing compression speed >500 MB/s per core, + scalable with multi-cores CPU. It features an extremely fast decoder, with speed in + multiple GB/s per core, typically reaching RAM speed limits on multi-core systems. + + The LZ4 compression library provides in-memory compression and decompression functions. + It gives full buffer control to user. + Compression can be done in: + - a single step (described as Simple Functions) + - a single step, reusing a context (described in Advanced Functions) + - unbounded multiple steps (described as Streaming compression) + + lz4.h generates and decodes LZ4-compressed blocks (doc/lz4_Block_format.md). + Decompressing such a compressed block requires additional metadata. + Exact metadata depends on exact decompression function. + For the typical case of LZ4_decompress_safe(), + metadata includes block's compressed size, and maximum bound of decompressed size. + Each application is free to encode and pass such metadata in whichever way it wants. + + lz4.h only handle blocks, it can not generate Frames. + + Blocks are different from Frames (doc/lz4_Frame_format.md). + Frames bundle both blocks and metadata in a specified manner. + Embedding metadata is required for compressed data to be self-contained and portable. + Frame format is delivered through a companion API, declared in lz4frame.h. + The `lz4` CLI can only manage frames. +*/ + +/*^*************************************************************** +* Export parameters +*****************************************************************/ +/* +* LZ4_DLL_EXPORT : +* Enable exporting of functions when building a Windows DLL +* LZ4LIB_VISIBILITY : +* Control library symbols visibility. +*/ +#ifndef LZ4LIB_VISIBILITY +# if defined(__GNUC__) && (__GNUC__ >= 4) +# define LZ4LIB_VISIBILITY __attribute__ ((visibility ("default"))) +# else +# define LZ4LIB_VISIBILITY +# endif +#endif +#if defined(LZ4_DLL_EXPORT) && (LZ4_DLL_EXPORT==1) +# define LZ4LIB_API __declspec(dllexport) LZ4LIB_VISIBILITY +#elif defined(LZ4_DLL_IMPORT) && (LZ4_DLL_IMPORT==1) +# define LZ4LIB_API __declspec(dllimport) LZ4LIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/ +#else +# define LZ4LIB_API LZ4LIB_VISIBILITY +#endif + +/*! LZ4_FREESTANDING : + * When this macro is set to 1, it enables "freestanding mode" that is + * suitable for typical freestanding environment which doesn't support + * standard C library. + * + * - LZ4_FREESTANDING is a compile-time switch. + * - It requires the following macros to be defined: + * LZ4_memcpy, LZ4_memmove, LZ4_memset. + * - It only enables LZ4/HC functions which don't use heap. + * All LZ4F_* functions are not supported. + * - See tests/freestanding.c to check its basic setup. + */ +#if defined(LZ4_FREESTANDING) && (LZ4_FREESTANDING == 1) +# define LZ4_HEAPMODE 0 +# define LZ4HC_HEAPMODE 0 +# define LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION 1 +# if !defined(LZ4_memcpy) +# error "LZ4_FREESTANDING requires macro 'LZ4_memcpy'." +# endif +# if !defined(LZ4_memset) +# error "LZ4_FREESTANDING requires macro 'LZ4_memset'." +# endif +# if !defined(LZ4_memmove) +# error "LZ4_FREESTANDING requires macro 'LZ4_memmove'." +# endif +#elif ! defined(LZ4_FREESTANDING) +# define LZ4_FREESTANDING 0 +#endif + + +/*------ Version ------*/ +#define LZ4_VERSION_MAJOR 1 /* for breaking interface changes */ +#define LZ4_VERSION_MINOR 9 /* for new (non-breaking) interface capabilities */ +#define LZ4_VERSION_RELEASE 4 /* for tweaks, bug-fixes, or development */ + +#define LZ4_VERSION_NUMBER (LZ4_VERSION_MAJOR *100*100 + LZ4_VERSION_MINOR *100 + LZ4_VERSION_RELEASE) + +#define LZ4_LIB_VERSION LZ4_VERSION_MAJOR.LZ4_VERSION_MINOR.LZ4_VERSION_RELEASE +#define LZ4_QUOTE(str) #str +#define LZ4_EXPAND_AND_QUOTE(str) LZ4_QUOTE(str) +#define LZ4_VERSION_STRING LZ4_EXPAND_AND_QUOTE(LZ4_LIB_VERSION) /* requires v1.7.3+ */ + +LZ4LIB_API int LZ4_versionNumber (void); /**< library version number; useful to check dll version; requires v1.3.0+ */ +LZ4LIB_API const char* LZ4_versionString (void); /**< library version string; useful to check dll version; requires v1.7.5+ */ + + +/*-************************************ +* Tuning parameter +**************************************/ +#define LZ4_MEMORY_USAGE_MIN 10 +#define LZ4_MEMORY_USAGE_DEFAULT 14 +#define LZ4_MEMORY_USAGE_MAX 20 + +/*! + * LZ4_MEMORY_USAGE : + * Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; ) + * Increasing memory usage improves compression ratio, at the cost of speed. + * Reduced memory usage may improve speed at the cost of ratio, thanks to better cache locality. + * Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache + */ +#ifndef LZ4_MEMORY_USAGE +# define LZ4_MEMORY_USAGE LZ4_MEMORY_USAGE_DEFAULT +#endif + +#if (LZ4_MEMORY_USAGE < LZ4_MEMORY_USAGE_MIN) +# error "LZ4_MEMORY_USAGE is too small !" +#endif + +#if (LZ4_MEMORY_USAGE > LZ4_MEMORY_USAGE_MAX) +# error "LZ4_MEMORY_USAGE is too large !" +#endif + +/*-************************************ +* Simple Functions +**************************************/ +/*! LZ4_compress_default() : + * Compresses 'srcSize' bytes from buffer 'src' + * into already allocated 'dst' buffer of size 'dstCapacity'. + * Compression is guaranteed to succeed if 'dstCapacity' >= LZ4_compressBound(srcSize). + * It also runs faster, so it's a recommended setting. + * If the function cannot compress 'src' into a more limited 'dst' budget, + * compression stops *immediately*, and the function result is zero. + * In which case, 'dst' content is undefined (invalid). + * srcSize : max supported value is LZ4_MAX_INPUT_SIZE. + * dstCapacity : size of buffer 'dst' (which must be already allocated) + * @return : the number of bytes written into buffer 'dst' (necessarily <= dstCapacity) + * or 0 if compression fails + * Note : This function is protected against buffer overflow scenarios (never writes outside 'dst' buffer, nor read outside 'source' buffer). + */ +LZ4LIB_API int LZ4_compress_default(const char* src, char* dst, int srcSize, int dstCapacity); + +/*! LZ4_decompress_safe() : + * compressedSize : is the exact complete size of the compressed block. + * dstCapacity : is the size of destination buffer (which must be already allocated), presumed an upper bound of decompressed size. + * @return : the number of bytes decompressed into destination buffer (necessarily <= dstCapacity) + * If destination buffer is not large enough, decoding will stop and output an error code (negative value). + * If the source stream is detected malformed, the function will stop decoding and return a negative result. + * Note 1 : This function is protected against malicious data packets : + * it will never writes outside 'dst' buffer, nor read outside 'source' buffer, + * even if the compressed block is maliciously modified to order the decoder to do these actions. + * In such case, the decoder stops immediately, and considers the compressed block malformed. + * Note 2 : compressedSize and dstCapacity must be provided to the function, the compressed block does not contain them. + * The implementation is free to send / store / derive this information in whichever way is most beneficial. + * If there is a need for a different format which bundles together both compressed data and its metadata, consider looking at lz4frame.h instead. + */ +LZ4LIB_API int LZ4_decompress_safe (const char* src, char* dst, int compressedSize, int dstCapacity); + + +/*-************************************ +* Advanced Functions +**************************************/ +#define LZ4_MAX_INPUT_SIZE 0x7E000000 /* 2 113 929 216 bytes */ +#define LZ4_COMPRESSBOUND(isize) ((unsigned)(isize) > (unsigned)LZ4_MAX_INPUT_SIZE ? 0 : (isize) + ((isize)/255) + 16) + +/*! LZ4_compressBound() : + Provides the maximum size that LZ4 compression may output in a "worst case" scenario (input data not compressible) + This function is primarily useful for memory allocation purposes (destination buffer size). + Macro LZ4_COMPRESSBOUND() is also provided for compilation-time evaluation (stack memory allocation for example). + Note that LZ4_compress_default() compresses faster when dstCapacity is >= LZ4_compressBound(srcSize) + inputSize : max supported value is LZ4_MAX_INPUT_SIZE + return : maximum output size in a "worst case" scenario + or 0, if input size is incorrect (too large or negative) +*/ +LZ4LIB_API int LZ4_compressBound(int inputSize); + +/*! LZ4_compress_fast() : + Same as LZ4_compress_default(), but allows selection of "acceleration" factor. + The larger the acceleration value, the faster the algorithm, but also the lesser the compression. + It's a trade-off. It can be fine tuned, with each successive value providing roughly +~3% to speed. + An acceleration value of "1" is the same as regular LZ4_compress_default() + Values <= 0 will be replaced by LZ4_ACCELERATION_DEFAULT (currently == 1, see lz4.c). + Values > LZ4_ACCELERATION_MAX will be replaced by LZ4_ACCELERATION_MAX (currently == 65537, see lz4.c). +*/ +LZ4LIB_API int LZ4_compress_fast (const char* src, char* dst, int srcSize, int dstCapacity, int acceleration); + + +/*! LZ4_compress_fast_extState() : + * Same as LZ4_compress_fast(), using an externally allocated memory space for its state. + * Use LZ4_sizeofState() to know how much memory must be allocated, + * and allocate it on 8-bytes boundaries (using `malloc()` typically). + * Then, provide this buffer as `void* state` to compression function. + */ +LZ4LIB_API int LZ4_sizeofState(void); +LZ4LIB_API int LZ4_compress_fast_extState (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration); + + +/*! LZ4_compress_destSize() : + * Reverse the logic : compresses as much data as possible from 'src' buffer + * into already allocated buffer 'dst', of size >= 'targetDestSize'. + * This function either compresses the entire 'src' content into 'dst' if it's large enough, + * or fill 'dst' buffer completely with as much data as possible from 'src'. + * note: acceleration parameter is fixed to "default". + * + * *srcSizePtr : will be modified to indicate how many bytes where read from 'src' to fill 'dst'. + * New value is necessarily <= input value. + * @return : Nb bytes written into 'dst' (necessarily <= targetDestSize) + * or 0 if compression fails. + * + * Note : from v1.8.2 to v1.9.1, this function had a bug (fixed un v1.9.2+): + * the produced compressed content could, in specific circumstances, + * require to be decompressed into a destination buffer larger + * by at least 1 byte than the content to decompress. + * If an application uses `LZ4_compress_destSize()`, + * it's highly recommended to update liblz4 to v1.9.2 or better. + * If this can't be done or ensured, + * the receiving decompression function should provide + * a dstCapacity which is > decompressedSize, by at least 1 byte. + * See https://github.com/lz4/lz4/issues/859 for details + */ +LZ4LIB_API int LZ4_compress_destSize (const char* src, char* dst, int* srcSizePtr, int targetDstSize); + + +/*! LZ4_decompress_safe_partial() : + * Decompress an LZ4 compressed block, of size 'srcSize' at position 'src', + * into destination buffer 'dst' of size 'dstCapacity'. + * Up to 'targetOutputSize' bytes will be decoded. + * The function stops decoding on reaching this objective. + * This can be useful to boost performance + * whenever only the beginning of a block is required. + * + * @return : the number of bytes decoded in `dst` (necessarily <= targetOutputSize) + * If source stream is detected malformed, function returns a negative result. + * + * Note 1 : @return can be < targetOutputSize, if compressed block contains less data. + * + * Note 2 : targetOutputSize must be <= dstCapacity + * + * Note 3 : this function effectively stops decoding on reaching targetOutputSize, + * so dstCapacity is kind of redundant. + * This is because in older versions of this function, + * decoding operation would still write complete sequences. + * Therefore, there was no guarantee that it would stop writing at exactly targetOutputSize, + * it could write more bytes, though only up to dstCapacity. + * Some "margin" used to be required for this operation to work properly. + * Thankfully, this is no longer necessary. + * The function nonetheless keeps the same signature, in an effort to preserve API compatibility. + * + * Note 4 : If srcSize is the exact size of the block, + * then targetOutputSize can be any value, + * including larger than the block's decompressed size. + * The function will, at most, generate block's decompressed size. + * + * Note 5 : If srcSize is _larger_ than block's compressed size, + * then targetOutputSize **MUST** be <= block's decompressed size. + * Otherwise, *silent corruption will occur*. + */ +LZ4LIB_API int LZ4_decompress_safe_partial (const char* src, char* dst, int srcSize, int targetOutputSize, int dstCapacity); + + +/*-********************************************* +* Streaming Compression Functions +***********************************************/ +typedef union LZ4_stream_u LZ4_stream_t; /* incomplete type (defined later) */ + +/** + Note about RC_INVOKED + + - RC_INVOKED is predefined symbol of rc.exe (the resource compiler which is part of MSVC/Visual Studio). + https://docs.microsoft.com/en-us/windows/win32/menurc/predefined-macros + + - Since rc.exe is a legacy compiler, it truncates long symbol (> 30 chars) + and reports warning "RC4011: identifier truncated". + + - To eliminate the warning, we surround long preprocessor symbol with + "#if !defined(RC_INVOKED) ... #endif" block that means + "skip this block when rc.exe is trying to read it". +*/ +#if !defined(RC_INVOKED) /* https://docs.microsoft.com/en-us/windows/win32/menurc/predefined-macros */ +#if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION) +LZ4LIB_API LZ4_stream_t* LZ4_createStream(void); +LZ4LIB_API int LZ4_freeStream (LZ4_stream_t* streamPtr); +#endif /* !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION) */ +#endif + +/*! LZ4_resetStream_fast() : v1.9.0+ + * Use this to prepare an LZ4_stream_t for a new chain of dependent blocks + * (e.g., LZ4_compress_fast_continue()). + * + * An LZ4_stream_t must be initialized once before usage. + * This is automatically done when created by LZ4_createStream(). + * However, should the LZ4_stream_t be simply declared on stack (for example), + * it's necessary to initialize it first, using LZ4_initStream(). + * + * After init, start any new stream with LZ4_resetStream_fast(). + * A same LZ4_stream_t can be re-used multiple times consecutively + * and compress multiple streams, + * provided that it starts each new stream with LZ4_resetStream_fast(). + * + * LZ4_resetStream_fast() is much faster than LZ4_initStream(), + * but is not compatible with memory regions containing garbage data. + * + * Note: it's only useful to call LZ4_resetStream_fast() + * in the context of streaming compression. + * The *extState* functions perform their own resets. + * Invoking LZ4_resetStream_fast() before is redundant, and even counterproductive. + */ +LZ4LIB_API void LZ4_resetStream_fast (LZ4_stream_t* streamPtr); + +/*! LZ4_loadDict() : + * Use this function to reference a static dictionary into LZ4_stream_t. + * The dictionary must remain available during compression. + * LZ4_loadDict() triggers a reset, so any previous data will be forgotten. + * The same dictionary will have to be loaded on decompression side for successful decoding. + * Dictionary are useful for better compression of small data (KB range). + * While LZ4 accept any input as dictionary, + * results are generally better when using Zstandard's Dictionary Builder. + * Loading a size of 0 is allowed, and is the same as reset. + * @return : loaded dictionary size, in bytes (necessarily <= 64 KB) + */ +LZ4LIB_API int LZ4_loadDict (LZ4_stream_t* streamPtr, const char* dictionary, int dictSize); + +/*! LZ4_compress_fast_continue() : + * Compress 'src' content using data from previously compressed blocks, for better compression ratio. + * 'dst' buffer must be already allocated. + * If dstCapacity >= LZ4_compressBound(srcSize), compression is guaranteed to succeed, and runs faster. + * + * @return : size of compressed block + * or 0 if there is an error (typically, cannot fit into 'dst'). + * + * Note 1 : Each invocation to LZ4_compress_fast_continue() generates a new block. + * Each block has precise boundaries. + * Each block must be decompressed separately, calling LZ4_decompress_*() with relevant metadata. + * It's not possible to append blocks together and expect a single invocation of LZ4_decompress_*() to decompress them together. + * + * Note 2 : The previous 64KB of source data is __assumed__ to remain present, unmodified, at same address in memory ! + * + * Note 3 : When input is structured as a double-buffer, each buffer can have any size, including < 64 KB. + * Make sure that buffers are separated, by at least one byte. + * This construction ensures that each block only depends on previous block. + * + * Note 4 : If input buffer is a ring-buffer, it can have any size, including < 64 KB. + * + * Note 5 : After an error, the stream status is undefined (invalid), it can only be reset or freed. + */ +LZ4LIB_API int LZ4_compress_fast_continue (LZ4_stream_t* streamPtr, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration); + +/*! LZ4_saveDict() : + * If last 64KB data cannot be guaranteed to remain available at its current memory location, + * save it into a safer place (char* safeBuffer). + * This is schematically equivalent to a memcpy() followed by LZ4_loadDict(), + * but is much faster, because LZ4_saveDict() doesn't need to rebuild tables. + * @return : saved dictionary size in bytes (necessarily <= maxDictSize), or 0 if error. + */ +LZ4LIB_API int LZ4_saveDict (LZ4_stream_t* streamPtr, char* safeBuffer, int maxDictSize); + + +/*-********************************************** +* Streaming Decompression Functions +* Bufferless synchronous API +************************************************/ +typedef union LZ4_streamDecode_u LZ4_streamDecode_t; /* tracking context */ + +/*! LZ4_createStreamDecode() and LZ4_freeStreamDecode() : + * creation / destruction of streaming decompression tracking context. + * A tracking context can be re-used multiple times. + */ +#if !defined(RC_INVOKED) /* https://docs.microsoft.com/en-us/windows/win32/menurc/predefined-macros */ +#if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION) +LZ4LIB_API LZ4_streamDecode_t* LZ4_createStreamDecode(void); +LZ4LIB_API int LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream); +#endif /* !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION) */ +#endif + +/*! LZ4_setStreamDecode() : + * An LZ4_streamDecode_t context can be allocated once and re-used multiple times. + * Use this function to start decompression of a new stream of blocks. + * A dictionary can optionally be set. Use NULL or size 0 for a reset order. + * Dictionary is presumed stable : it must remain accessible and unmodified during next decompression. + * @return : 1 if OK, 0 if error + */ +LZ4LIB_API int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize); + +/*! LZ4_decoderRingBufferSize() : v1.8.2+ + * Note : in a ring buffer scenario (optional), + * blocks are presumed decompressed next to each other + * up to the moment there is not enough remaining space for next block (remainingSize < maxBlockSize), + * at which stage it resumes from beginning of ring buffer. + * When setting such a ring buffer for streaming decompression, + * provides the minimum size of this ring buffer + * to be compatible with any source respecting maxBlockSize condition. + * @return : minimum ring buffer size, + * or 0 if there is an error (invalid maxBlockSize). + */ +LZ4LIB_API int LZ4_decoderRingBufferSize(int maxBlockSize); +#define LZ4_DECODER_RING_BUFFER_SIZE(maxBlockSize) (65536 + 14 + (maxBlockSize)) /* for static allocation; maxBlockSize presumed valid */ + +/*! LZ4_decompress_*_continue() : + * These decoding functions allow decompression of consecutive blocks in "streaming" mode. + * A block is an unsplittable entity, it must be presented entirely to a decompression function. + * Decompression functions only accepts one block at a time. + * The last 64KB of previously decoded data *must* remain available and unmodified at the memory position where they were decoded. + * If less than 64KB of data has been decoded, all the data must be present. + * + * Special : if decompression side sets a ring buffer, it must respect one of the following conditions : + * - Decompression buffer size is _at least_ LZ4_decoderRingBufferSize(maxBlockSize). + * maxBlockSize is the maximum size of any single block. It can have any value > 16 bytes. + * In which case, encoding and decoding buffers do not need to be synchronized. + * Actually, data can be produced by any source compliant with LZ4 format specification, and respecting maxBlockSize. + * - Synchronized mode : + * Decompression buffer size is _exactly_ the same as compression buffer size, + * and follows exactly same update rule (block boundaries at same positions), + * and decoding function is provided with exact decompressed size of each block (exception for last block of the stream), + * _then_ decoding & encoding ring buffer can have any size, including small ones ( < 64 KB). + * - Decompression buffer is larger than encoding buffer, by a minimum of maxBlockSize more bytes. + * In which case, encoding and decoding buffers do not need to be synchronized, + * and encoding ring buffer can have any size, including small ones ( < 64 KB). + * + * Whenever these conditions are not possible, + * save the last 64KB of decoded data into a safe buffer where it can't be modified during decompression, + * then indicate where this data is saved using LZ4_setStreamDecode(), before decompressing next block. +*/ +LZ4LIB_API int +LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, + const char* src, char* dst, + int srcSize, int dstCapacity); + + +/*! LZ4_decompress_*_usingDict() : + * These decoding functions work the same as + * a combination of LZ4_setStreamDecode() followed by LZ4_decompress_*_continue() + * They are stand-alone, and don't need an LZ4_streamDecode_t structure. + * Dictionary is presumed stable : it must remain accessible and unmodified during decompression. + * Performance tip : Decompression speed can be substantially increased + * when dst == dictStart + dictSize. + */ +LZ4LIB_API int +LZ4_decompress_safe_usingDict(const char* src, char* dst, + int srcSize, int dstCapacity, + const char* dictStart, int dictSize); + +LZ4LIB_API int +LZ4_decompress_safe_partial_usingDict(const char* src, char* dst, + int compressedSize, + int targetOutputSize, int maxOutputSize, + const char* dictStart, int dictSize); + +#endif /* LZ4_H_2983827168210 */ + + +/*^************************************* + * !!!!!! STATIC LINKING ONLY !!!!!! + ***************************************/ + +/*-**************************************************************************** + * Experimental section + * + * Symbols declared in this section must be considered unstable. Their + * signatures or semantics may change, or they may be removed altogether in the + * future. They are therefore only safe to depend on when the caller is + * statically linked against the library. + * + * To protect against unsafe usage, not only are the declarations guarded, + * the definitions are hidden by default + * when building LZ4 as a shared/dynamic library. + * + * In order to access these declarations, + * define LZ4_STATIC_LINKING_ONLY in your application + * before including LZ4's headers. + * + * In order to make their implementations accessible dynamically, you must + * define LZ4_PUBLISH_STATIC_FUNCTIONS when building the LZ4 library. + ******************************************************************************/ + +#ifdef LZ4_STATIC_LINKING_ONLY + +#ifndef LZ4_STATIC_3504398509 +#define LZ4_STATIC_3504398509 + +#ifdef LZ4_PUBLISH_STATIC_FUNCTIONS +#define LZ4LIB_STATIC_API LZ4LIB_API +#else +#define LZ4LIB_STATIC_API +#endif + + +/*! LZ4_compress_fast_extState_fastReset() : + * A variant of LZ4_compress_fast_extState(). + * + * Using this variant avoids an expensive initialization step. + * It is only safe to call if the state buffer is known to be correctly initialized already + * (see above comment on LZ4_resetStream_fast() for a definition of "correctly initialized"). + * From a high level, the difference is that + * this function initializes the provided state with a call to something like LZ4_resetStream_fast() + * while LZ4_compress_fast_extState() starts with a call to LZ4_resetStream(). + */ +LZ4LIB_STATIC_API int LZ4_compress_fast_extState_fastReset (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration); + +/*! LZ4_attach_dictionary() : + * This is an experimental API that allows + * efficient use of a static dictionary many times. + * + * Rather than re-loading the dictionary buffer into a working context before + * each compression, or copying a pre-loaded dictionary's LZ4_stream_t into a + * working LZ4_stream_t, this function introduces a no-copy setup mechanism, + * in which the working stream references the dictionary stream in-place. + * + * Several assumptions are made about the state of the dictionary stream. + * Currently, only streams which have been prepared by LZ4_loadDict() should + * be expected to work. + * + * Alternatively, the provided dictionaryStream may be NULL, + * in which case any existing dictionary stream is unset. + * + * If a dictionary is provided, it replaces any pre-existing stream history. + * The dictionary contents are the only history that can be referenced and + * logically immediately precede the data compressed in the first subsequent + * compression call. + * + * The dictionary will only remain attached to the working stream through the + * first compression call, at the end of which it is cleared. The dictionary + * stream (and source buffer) must remain in-place / accessible / unchanged + * through the completion of the first compression call on the stream. + */ +LZ4LIB_STATIC_API void +LZ4_attach_dictionary(LZ4_stream_t* workingStream, + const LZ4_stream_t* dictionaryStream); + + +/*! In-place compression and decompression + * + * It's possible to have input and output sharing the same buffer, + * for highly constrained memory environments. + * In both cases, it requires input to lay at the end of the buffer, + * and decompression to start at beginning of the buffer. + * Buffer size must feature some margin, hence be larger than final size. + * + * |<------------------------buffer--------------------------------->| + * |<-----------compressed data--------->| + * |<-----------decompressed size------------------>| + * |<----margin---->| + * + * This technique is more useful for decompression, + * since decompressed size is typically larger, + * and margin is short. + * + * In-place decompression will work inside any buffer + * which size is >= LZ4_DECOMPRESS_INPLACE_BUFFER_SIZE(decompressedSize). + * This presumes that decompressedSize > compressedSize. + * Otherwise, it means compression actually expanded data, + * and it would be more efficient to store such data with a flag indicating it's not compressed. + * This can happen when data is not compressible (already compressed, or encrypted). + * + * For in-place compression, margin is larger, as it must be able to cope with both + * history preservation, requiring input data to remain unmodified up to LZ4_DISTANCE_MAX, + * and data expansion, which can happen when input is not compressible. + * As a consequence, buffer size requirements are much higher, + * and memory savings offered by in-place compression are more limited. + * + * There are ways to limit this cost for compression : + * - Reduce history size, by modifying LZ4_DISTANCE_MAX. + * Note that it is a compile-time constant, so all compressions will apply this limit. + * Lower values will reduce compression ratio, except when input_size < LZ4_DISTANCE_MAX, + * so it's a reasonable trick when inputs are known to be small. + * - Require the compressor to deliver a "maximum compressed size". + * This is the `dstCapacity` parameter in `LZ4_compress*()`. + * When this size is < LZ4_COMPRESSBOUND(inputSize), then compression can fail, + * in which case, the return code will be 0 (zero). + * The caller must be ready for these cases to happen, + * and typically design a backup scheme to send data uncompressed. + * The combination of both techniques can significantly reduce + * the amount of margin required for in-place compression. + * + * In-place compression can work in any buffer + * which size is >= (maxCompressedSize) + * with maxCompressedSize == LZ4_COMPRESSBOUND(srcSize) for guaranteed compression success. + * LZ4_COMPRESS_INPLACE_BUFFER_SIZE() depends on both maxCompressedSize and LZ4_DISTANCE_MAX, + * so it's possible to reduce memory requirements by playing with them. + */ + +#define LZ4_DECOMPRESS_INPLACE_MARGIN(compressedSize) (((compressedSize) >> 8) + 32) +#define LZ4_DECOMPRESS_INPLACE_BUFFER_SIZE(decompressedSize) ((decompressedSize) + LZ4_DECOMPRESS_INPLACE_MARGIN(decompressedSize)) /**< note: presumes that compressedSize < decompressedSize. note2: margin is overestimated a bit, since it could use compressedSize instead */ + +#ifndef LZ4_DISTANCE_MAX /* history window size; can be user-defined at compile time */ +# define LZ4_DISTANCE_MAX 65535 /* set to maximum value by default */ +#endif + +#define LZ4_COMPRESS_INPLACE_MARGIN (LZ4_DISTANCE_MAX + 32) /* LZ4_DISTANCE_MAX can be safely replaced by srcSize when it's smaller */ +#define LZ4_COMPRESS_INPLACE_BUFFER_SIZE(maxCompressedSize) ((maxCompressedSize) + LZ4_COMPRESS_INPLACE_MARGIN) /**< maxCompressedSize is generally LZ4_COMPRESSBOUND(inputSize), but can be set to any lower value, with the risk that compression can fail (return code 0(zero)) */ + +#endif /* LZ4_STATIC_3504398509 */ +#endif /* LZ4_STATIC_LINKING_ONLY */ + + + +#ifndef LZ4_H_98237428734687 +#define LZ4_H_98237428734687 + +/*-************************************************************ + * Private Definitions + ************************************************************** + * Do not use these definitions directly. + * They are only exposed to allow static allocation of `LZ4_stream_t` and `LZ4_streamDecode_t`. + * Accessing members will expose user code to API and/or ABI break in future versions of the library. + **************************************************************/ +#define LZ4_HASHLOG (LZ4_MEMORY_USAGE-2) +#define LZ4_HASHTABLESIZE (1 << LZ4_MEMORY_USAGE) +#define LZ4_HASH_SIZE_U32 (1 << LZ4_HASHLOG) /* required as macro for static allocation */ + +#if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +# include + typedef int8_t LZ4_i8; + typedef uint8_t LZ4_byte; + typedef uint16_t LZ4_u16; + typedef uint32_t LZ4_u32; +#else + typedef signed char LZ4_i8; + typedef unsigned char LZ4_byte; + typedef unsigned short LZ4_u16; + typedef unsigned int LZ4_u32; +#endif + +/*! LZ4_stream_t : + * Never ever use below internal definitions directly ! + * These definitions are not API/ABI safe, and may change in future versions. + * If you need static allocation, declare or allocate an LZ4_stream_t object. +**/ + +typedef struct LZ4_stream_t_internal LZ4_stream_t_internal; +struct LZ4_stream_t_internal { + LZ4_u32 hashTable[LZ4_HASH_SIZE_U32]; + const LZ4_byte* dictionary; + const LZ4_stream_t_internal* dictCtx; + LZ4_u32 currentOffset; + LZ4_u32 tableType; + LZ4_u32 dictSize; + /* Implicit padding to ensure structure is aligned */ +}; + +#define LZ4_STREAM_MINSIZE ((1UL << LZ4_MEMORY_USAGE) + 32) /* static size, for inter-version compatibility */ +union LZ4_stream_u { + char minStateSize[LZ4_STREAM_MINSIZE]; + LZ4_stream_t_internal internal_donotuse; +}; /* previously typedef'd to LZ4_stream_t */ + + +/*! LZ4_initStream() : v1.9.0+ + * An LZ4_stream_t structure must be initialized at least once. + * This is automatically done when invoking LZ4_createStream(), + * but it's not when the structure is simply declared on stack (for example). + * + * Use LZ4_initStream() to properly initialize a newly declared LZ4_stream_t. + * It can also initialize any arbitrary buffer of sufficient size, + * and will @return a pointer of proper type upon initialization. + * + * Note : initialization fails if size and alignment conditions are not respected. + * In which case, the function will @return NULL. + * Note2: An LZ4_stream_t structure guarantees correct alignment and size. + * Note3: Before v1.9.0, use LZ4_resetStream() instead +**/ +LZ4LIB_API LZ4_stream_t* LZ4_initStream (void* buffer, size_t size); + + +/*! LZ4_streamDecode_t : + * Never ever use below internal definitions directly ! + * These definitions are not API/ABI safe, and may change in future versions. + * If you need static allocation, declare or allocate an LZ4_streamDecode_t object. +**/ +typedef struct { + const LZ4_byte* externalDict; + const LZ4_byte* prefixEnd; + size_t extDictSize; + size_t prefixSize; +} LZ4_streamDecode_t_internal; + +#define LZ4_STREAMDECODE_MINSIZE 32 +union LZ4_streamDecode_u { + char minStateSize[LZ4_STREAMDECODE_MINSIZE]; + LZ4_streamDecode_t_internal internal_donotuse; +} ; /* previously typedef'd to LZ4_streamDecode_t */ + + + +/*-************************************ +* Obsolete Functions +**************************************/ + +/*! Deprecation warnings + * + * Deprecated functions make the compiler generate a warning when invoked. + * This is meant to invite users to update their source code. + * Should deprecation warnings be a problem, it is generally possible to disable them, + * typically with -Wno-deprecated-declarations for gcc + * or _CRT_SECURE_NO_WARNINGS in Visual. + * + * Another method is to define LZ4_DISABLE_DEPRECATE_WARNINGS + * before including the header file. + */ +#ifdef LZ4_DISABLE_DEPRECATE_WARNINGS +# define LZ4_DEPRECATED(message) /* disable deprecation warnings */ +#else +# if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */ +# define LZ4_DEPRECATED(message) [[deprecated(message)]] +# elif defined(_MSC_VER) +# define LZ4_DEPRECATED(message) __declspec(deprecated(message)) +# elif defined(__clang__) || (defined(__GNUC__) && (__GNUC__ * 10 + __GNUC_MINOR__ >= 45)) +# define LZ4_DEPRECATED(message) __attribute__((deprecated(message))) +# elif defined(__GNUC__) && (__GNUC__ * 10 + __GNUC_MINOR__ >= 31) +# define LZ4_DEPRECATED(message) __attribute__((deprecated)) +# else +# pragma message("WARNING: LZ4_DEPRECATED needs custom implementation for this compiler") +# define LZ4_DEPRECATED(message) /* disabled */ +# endif +#endif /* LZ4_DISABLE_DEPRECATE_WARNINGS */ + +/*! Obsolete compression functions (since v1.7.3) */ +LZ4_DEPRECATED("use LZ4_compress_default() instead") LZ4LIB_API int LZ4_compress (const char* src, char* dest, int srcSize); +LZ4_DEPRECATED("use LZ4_compress_default() instead") LZ4LIB_API int LZ4_compress_limitedOutput (const char* src, char* dest, int srcSize, int maxOutputSize); +LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") LZ4LIB_API int LZ4_compress_withState (void* state, const char* source, char* dest, int inputSize); +LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") LZ4LIB_API int LZ4_compress_limitedOutput_withState (void* state, const char* source, char* dest, int inputSize, int maxOutputSize); +LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") LZ4LIB_API int LZ4_compress_continue (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize); +LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") LZ4LIB_API int LZ4_compress_limitedOutput_continue (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize, int maxOutputSize); + +/*! Obsolete decompression functions (since v1.8.0) */ +LZ4_DEPRECATED("use LZ4_decompress_fast() instead") LZ4LIB_API int LZ4_uncompress (const char* source, char* dest, int outputSize); +LZ4_DEPRECATED("use LZ4_decompress_safe() instead") LZ4LIB_API int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize); + +/* Obsolete streaming functions (since v1.7.0) + * degraded functionality; do not use! + * + * In order to perform streaming compression, these functions depended on data + * that is no longer tracked in the state. They have been preserved as well as + * possible: using them will still produce a correct output. However, they don't + * actually retain any history between compression calls. The compression ratio + * achieved will therefore be no better than compressing each chunk + * independently. + */ +LZ4_DEPRECATED("Use LZ4_createStream() instead") LZ4LIB_API void* LZ4_create (char* inputBuffer); +LZ4_DEPRECATED("Use LZ4_createStream() instead") LZ4LIB_API int LZ4_sizeofStreamState(void); +LZ4_DEPRECATED("Use LZ4_resetStream() instead") LZ4LIB_API int LZ4_resetStreamState(void* state, char* inputBuffer); +LZ4_DEPRECATED("Use LZ4_saveDict() instead") LZ4LIB_API char* LZ4_slideInputBuffer (void* state); + +/*! Obsolete streaming decoding functions (since v1.7.0) */ +LZ4_DEPRECATED("use LZ4_decompress_safe_usingDict() instead") LZ4LIB_API int LZ4_decompress_safe_withPrefix64k (const char* src, char* dst, int compressedSize, int maxDstSize); +LZ4_DEPRECATED("use LZ4_decompress_fast_usingDict() instead") LZ4LIB_API int LZ4_decompress_fast_withPrefix64k (const char* src, char* dst, int originalSize); + +/*! Obsolete LZ4_decompress_fast variants (since v1.9.0) : + * These functions used to be faster than LZ4_decompress_safe(), + * but this is no longer the case. They are now slower. + * This is because LZ4_decompress_fast() doesn't know the input size, + * and therefore must progress more cautiously into the input buffer to not read beyond the end of block. + * On top of that `LZ4_decompress_fast()` is not protected vs malformed or malicious inputs, making it a security liability. + * As a consequence, LZ4_decompress_fast() is strongly discouraged, and deprecated. + * + * The last remaining LZ4_decompress_fast() specificity is that + * it can decompress a block without knowing its compressed size. + * Such functionality can be achieved in a more secure manner + * by employing LZ4_decompress_safe_partial(). + * + * Parameters: + * originalSize : is the uncompressed size to regenerate. + * `dst` must be already allocated, its size must be >= 'originalSize' bytes. + * @return : number of bytes read from source buffer (== compressed size). + * The function expects to finish at block's end exactly. + * If the source stream is detected malformed, the function stops decoding and returns a negative result. + * note : LZ4_decompress_fast*() requires originalSize. Thanks to this information, it never writes past the output buffer. + * However, since it doesn't know its 'src' size, it may read an unknown amount of input, past input buffer bounds. + * Also, since match offsets are not validated, match reads from 'src' may underflow too. + * These issues never happen if input (compressed) data is correct. + * But they may happen if input data is invalid (error or intentional tampering). + * As a consequence, use these functions in trusted environments with trusted data **only**. + */ +LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe() instead") +LZ4LIB_API int LZ4_decompress_fast (const char* src, char* dst, int originalSize); +LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe_continue() instead") +LZ4LIB_API int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* src, char* dst, int originalSize); +LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe_usingDict() instead") +LZ4LIB_API int LZ4_decompress_fast_usingDict (const char* src, char* dst, int originalSize, const char* dictStart, int dictSize); + +/*! LZ4_resetStream() : + * An LZ4_stream_t structure must be initialized at least once. + * This is done with LZ4_initStream(), or LZ4_resetStream(). + * Consider switching to LZ4_initStream(), + * invoking LZ4_resetStream() will trigger deprecation warnings in the future. + */ +LZ4LIB_API void LZ4_resetStream (LZ4_stream_t* streamPtr); + + +#endif /* LZ4_H_98237428734687 */ + + +#if defined (__cplusplus) +} +#endif diff --git a/c-blosc/internal-complibs/lz4-1.9.4/lz4hc.c b/c-blosc/internal-complibs/lz4-1.9.4/lz4hc.c new file mode 100644 index 0000000..b21ad6b --- /dev/null +++ b/c-blosc/internal-complibs/lz4-1.9.4/lz4hc.c @@ -0,0 +1,1631 @@ +/* + LZ4 HC - High Compression Mode of LZ4 + Copyright (C) 2011-2020, Yann Collet. + + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - LZ4 source repository : https://github.com/lz4/lz4 + - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c +*/ +/* note : lz4hc is not an independent module, it requires lz4.h/lz4.c for proper compilation */ + + +/* ************************************* +* Tuning Parameter +***************************************/ + +/*! HEAPMODE : + * Select how default compression function will allocate workplace memory, + * in stack (0:fastest), or in heap (1:requires malloc()). + * Since workplace is rather large, heap mode is recommended. +**/ +#ifndef LZ4HC_HEAPMODE +# define LZ4HC_HEAPMODE 1 +#endif + + +/*=== Dependency ===*/ +#define LZ4_HC_STATIC_LINKING_ONLY +#include "lz4hc.h" + + +/*=== Common definitions ===*/ +#if defined(__GNUC__) +# pragma GCC diagnostic ignored "-Wunused-function" +#endif +#if defined (__clang__) +# pragma clang diagnostic ignored "-Wunused-function" +#endif + +#define LZ4_COMMONDEFS_ONLY +#ifndef LZ4_SRC_INCLUDED +#include "lz4.c" /* LZ4_count, constants, mem */ +#endif + + +/*=== Enums ===*/ +typedef enum { noDictCtx, usingDictCtxHc } dictCtx_directive; + + +/*=== Constants ===*/ +#define OPTIMAL_ML (int)((ML_MASK-1)+MINMATCH) +#define LZ4_OPT_NUM (1<<12) + + +/*=== Macros ===*/ +#define MIN(a,b) ( (a) < (b) ? (a) : (b) ) +#define MAX(a,b) ( (a) > (b) ? (a) : (b) ) +#define HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8)-LZ4HC_HASH_LOG)) +#define DELTANEXTMAXD(p) chainTable[(p) & LZ4HC_MAXD_MASK] /* flexible, LZ4HC_MAXD dependent */ +#define DELTANEXTU16(table, pos) table[(U16)(pos)] /* faster */ +/* Make fields passed to, and updated by LZ4HC_encodeSequence explicit */ +#define UPDATABLE(ip, op, anchor) &ip, &op, &anchor + +static U32 LZ4HC_hashPtr(const void* ptr) { return HASH_FUNCTION(LZ4_read32(ptr)); } + + +/************************************** +* HC Compression +**************************************/ +static void LZ4HC_clearTables (LZ4HC_CCtx_internal* hc4) +{ + MEM_INIT(hc4->hashTable, 0, sizeof(hc4->hashTable)); + MEM_INIT(hc4->chainTable, 0xFF, sizeof(hc4->chainTable)); +} + +static void LZ4HC_init_internal (LZ4HC_CCtx_internal* hc4, const BYTE* start) +{ + size_t const bufferSize = (size_t)(hc4->end - hc4->prefixStart); + size_t newStartingOffset = bufferSize + hc4->dictLimit; + assert(newStartingOffset >= bufferSize); /* check overflow */ + if (newStartingOffset > 1 GB) { + LZ4HC_clearTables(hc4); + newStartingOffset = 0; + } + newStartingOffset += 64 KB; + hc4->nextToUpdate = (U32)newStartingOffset; + hc4->prefixStart = start; + hc4->end = start; + hc4->dictStart = start; + hc4->dictLimit = (U32)newStartingOffset; + hc4->lowLimit = (U32)newStartingOffset; +} + + +/* Update chains up to ip (excluded) */ +LZ4_FORCE_INLINE void LZ4HC_Insert (LZ4HC_CCtx_internal* hc4, const BYTE* ip) +{ + U16* const chainTable = hc4->chainTable; + U32* const hashTable = hc4->hashTable; + const BYTE* const prefixPtr = hc4->prefixStart; + U32 const prefixIdx = hc4->dictLimit; + U32 const target = (U32)(ip - prefixPtr) + prefixIdx; + U32 idx = hc4->nextToUpdate; + assert(ip >= prefixPtr); + assert(target >= prefixIdx); + + while (idx < target) { + U32 const h = LZ4HC_hashPtr(prefixPtr+idx-prefixIdx); + size_t delta = idx - hashTable[h]; + if (delta>LZ4_DISTANCE_MAX) delta = LZ4_DISTANCE_MAX; + DELTANEXTU16(chainTable, idx) = (U16)delta; + hashTable[h] = idx; + idx++; + } + + hc4->nextToUpdate = target; +} + +/** LZ4HC_countBack() : + * @return : negative value, nb of common bytes before ip/match */ +LZ4_FORCE_INLINE +int LZ4HC_countBack(const BYTE* const ip, const BYTE* const match, + const BYTE* const iMin, const BYTE* const mMin) +{ + int back = 0; + int const min = (int)MAX(iMin - ip, mMin - match); + assert(min <= 0); + assert(ip >= iMin); assert((size_t)(ip-iMin) < (1U<<31)); + assert(match >= mMin); assert((size_t)(match - mMin) < (1U<<31)); + while ( (back > min) + && (ip[back-1] == match[back-1]) ) + back--; + return back; +} + +#if defined(_MSC_VER) +# define LZ4HC_rotl32(x,r) _rotl(x,r) +#else +# define LZ4HC_rotl32(x,r) ((x << r) | (x >> (32 - r))) +#endif + + +static U32 LZ4HC_rotatePattern(size_t const rotate, U32 const pattern) +{ + size_t const bitsToRotate = (rotate & (sizeof(pattern) - 1)) << 3; + if (bitsToRotate == 0) return pattern; + return LZ4HC_rotl32(pattern, (int)bitsToRotate); +} + +/* LZ4HC_countPattern() : + * pattern32 must be a sample of repetitive pattern of length 1, 2 or 4 (but not 3!) */ +static unsigned +LZ4HC_countPattern(const BYTE* ip, const BYTE* const iEnd, U32 const pattern32) +{ + const BYTE* const iStart = ip; + reg_t const pattern = (sizeof(pattern)==8) ? + (reg_t)pattern32 + (((reg_t)pattern32) << (sizeof(pattern)*4)) : pattern32; + + while (likely(ip < iEnd-(sizeof(pattern)-1))) { + reg_t const diff = LZ4_read_ARCH(ip) ^ pattern; + if (!diff) { ip+=sizeof(pattern); continue; } + ip += LZ4_NbCommonBytes(diff); + return (unsigned)(ip - iStart); + } + + if (LZ4_isLittleEndian()) { + reg_t patternByte = pattern; + while ((ip>= 8; + } + } else { /* big endian */ + U32 bitOffset = (sizeof(pattern)*8) - 8; + while (ip < iEnd) { + BYTE const byte = (BYTE)(pattern >> bitOffset); + if (*ip != byte) break; + ip ++; bitOffset -= 8; + } } + + return (unsigned)(ip - iStart); +} + +/* LZ4HC_reverseCountPattern() : + * pattern must be a sample of repetitive pattern of length 1, 2 or 4 (but not 3!) + * read using natural platform endianness */ +static unsigned +LZ4HC_reverseCountPattern(const BYTE* ip, const BYTE* const iLow, U32 pattern) +{ + const BYTE* const iStart = ip; + + while (likely(ip >= iLow+4)) { + if (LZ4_read32(ip-4) != pattern) break; + ip -= 4; + } + { const BYTE* bytePtr = (const BYTE*)(&pattern) + 3; /* works for any endianness */ + while (likely(ip>iLow)) { + if (ip[-1] != *bytePtr) break; + ip--; bytePtr--; + } } + return (unsigned)(iStart - ip); +} + +/* LZ4HC_protectDictEnd() : + * Checks if the match is in the last 3 bytes of the dictionary, so reading the + * 4 byte MINMATCH would overflow. + * @returns true if the match index is okay. + */ +static int LZ4HC_protectDictEnd(U32 const dictLimit, U32 const matchIndex) +{ + return ((U32)((dictLimit - 1) - matchIndex) >= 3); +} + +typedef enum { rep_untested, rep_not, rep_confirmed } repeat_state_e; +typedef enum { favorCompressionRatio=0, favorDecompressionSpeed } HCfavor_e; + +LZ4_FORCE_INLINE int +LZ4HC_InsertAndGetWiderMatch ( + LZ4HC_CCtx_internal* const hc4, + const BYTE* const ip, + const BYTE* const iLowLimit, const BYTE* const iHighLimit, + int longest, + const BYTE** matchpos, + const BYTE** startpos, + const int maxNbAttempts, + const int patternAnalysis, const int chainSwap, + const dictCtx_directive dict, + const HCfavor_e favorDecSpeed) +{ + U16* const chainTable = hc4->chainTable; + U32* const HashTable = hc4->hashTable; + const LZ4HC_CCtx_internal * const dictCtx = hc4->dictCtx; + const BYTE* const prefixPtr = hc4->prefixStart; + const U32 prefixIdx = hc4->dictLimit; + const U32 ipIndex = (U32)(ip - prefixPtr) + prefixIdx; + const int withinStartDistance = (hc4->lowLimit + (LZ4_DISTANCE_MAX + 1) > ipIndex); + const U32 lowestMatchIndex = (withinStartDistance) ? hc4->lowLimit : ipIndex - LZ4_DISTANCE_MAX; + const BYTE* const dictStart = hc4->dictStart; + const U32 dictIdx = hc4->lowLimit; + const BYTE* const dictEnd = dictStart + prefixIdx - dictIdx; + int const lookBackLength = (int)(ip-iLowLimit); + int nbAttempts = maxNbAttempts; + U32 matchChainPos = 0; + U32 const pattern = LZ4_read32(ip); + U32 matchIndex; + repeat_state_e repeat = rep_untested; + size_t srcPatternLength = 0; + + DEBUGLOG(7, "LZ4HC_InsertAndGetWiderMatch"); + /* First Match */ + LZ4HC_Insert(hc4, ip); + matchIndex = HashTable[LZ4HC_hashPtr(ip)]; + DEBUGLOG(7, "First match at index %u / %u (lowestMatchIndex)", + matchIndex, lowestMatchIndex); + + while ((matchIndex>=lowestMatchIndex) && (nbAttempts>0)) { + int matchLength=0; + nbAttempts--; + assert(matchIndex < ipIndex); + if (favorDecSpeed && (ipIndex - matchIndex < 8)) { + /* do nothing */ + } else if (matchIndex >= prefixIdx) { /* within current Prefix */ + const BYTE* const matchPtr = prefixPtr + matchIndex - prefixIdx; + assert(matchPtr < ip); + assert(longest >= 1); + if (LZ4_read16(iLowLimit + longest - 1) == LZ4_read16(matchPtr - lookBackLength + longest - 1)) { + if (LZ4_read32(matchPtr) == pattern) { + int const back = lookBackLength ? LZ4HC_countBack(ip, matchPtr, iLowLimit, prefixPtr) : 0; + matchLength = MINMATCH + (int)LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, iHighLimit); + matchLength -= back; + if (matchLength > longest) { + longest = matchLength; + *matchpos = matchPtr + back; + *startpos = ip + back; + } } } + } else { /* lowestMatchIndex <= matchIndex < dictLimit */ + const BYTE* const matchPtr = dictStart + (matchIndex - dictIdx); + assert(matchIndex >= dictIdx); + if ( likely(matchIndex <= prefixIdx - 4) + && (LZ4_read32(matchPtr) == pattern) ) { + int back = 0; + const BYTE* vLimit = ip + (prefixIdx - matchIndex); + if (vLimit > iHighLimit) vLimit = iHighLimit; + matchLength = (int)LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, vLimit) + MINMATCH; + if ((ip+matchLength == vLimit) && (vLimit < iHighLimit)) + matchLength += LZ4_count(ip+matchLength, prefixPtr, iHighLimit); + back = lookBackLength ? LZ4HC_countBack(ip, matchPtr, iLowLimit, dictStart) : 0; + matchLength -= back; + if (matchLength > longest) { + longest = matchLength; + *matchpos = prefixPtr - prefixIdx + matchIndex + back; /* virtual pos, relative to ip, to retrieve offset */ + *startpos = ip + back; + } } } + + if (chainSwap && matchLength==longest) { /* better match => select a better chain */ + assert(lookBackLength==0); /* search forward only */ + if (matchIndex + (U32)longest <= ipIndex) { + int const kTrigger = 4; + U32 distanceToNextMatch = 1; + int const end = longest - MINMATCH + 1; + int step = 1; + int accel = 1 << kTrigger; + int pos; + for (pos = 0; pos < end; pos += step) { + U32 const candidateDist = DELTANEXTU16(chainTable, matchIndex + (U32)pos); + step = (accel++ >> kTrigger); + if (candidateDist > distanceToNextMatch) { + distanceToNextMatch = candidateDist; + matchChainPos = (U32)pos; + accel = 1 << kTrigger; + } } + if (distanceToNextMatch > 1) { + if (distanceToNextMatch > matchIndex) break; /* avoid overflow */ + matchIndex -= distanceToNextMatch; + continue; + } } } + + { U32 const distNextMatch = DELTANEXTU16(chainTable, matchIndex); + if (patternAnalysis && distNextMatch==1 && matchChainPos==0) { + U32 const matchCandidateIdx = matchIndex-1; + /* may be a repeated pattern */ + if (repeat == rep_untested) { + if ( ((pattern & 0xFFFF) == (pattern >> 16)) + & ((pattern & 0xFF) == (pattern >> 24)) ) { + repeat = rep_confirmed; + srcPatternLength = LZ4HC_countPattern(ip+sizeof(pattern), iHighLimit, pattern) + sizeof(pattern); + } else { + repeat = rep_not; + } } + if ( (repeat == rep_confirmed) && (matchCandidateIdx >= lowestMatchIndex) + && LZ4HC_protectDictEnd(prefixIdx, matchCandidateIdx) ) { + const int extDict = matchCandidateIdx < prefixIdx; + const BYTE* const matchPtr = (extDict ? dictStart - dictIdx : prefixPtr - prefixIdx) + matchCandidateIdx; + if (LZ4_read32(matchPtr) == pattern) { /* good candidate */ + const BYTE* const iLimit = extDict ? dictEnd : iHighLimit; + size_t forwardPatternLength = LZ4HC_countPattern(matchPtr+sizeof(pattern), iLimit, pattern) + sizeof(pattern); + if (extDict && matchPtr + forwardPatternLength == iLimit) { + U32 const rotatedPattern = LZ4HC_rotatePattern(forwardPatternLength, pattern); + forwardPatternLength += LZ4HC_countPattern(prefixPtr, iHighLimit, rotatedPattern); + } + { const BYTE* const lowestMatchPtr = extDict ? dictStart : prefixPtr; + size_t backLength = LZ4HC_reverseCountPattern(matchPtr, lowestMatchPtr, pattern); + size_t currentSegmentLength; + if (!extDict + && matchPtr - backLength == prefixPtr + && dictIdx < prefixIdx) { + U32 const rotatedPattern = LZ4HC_rotatePattern((U32)(-(int)backLength), pattern); + backLength += LZ4HC_reverseCountPattern(dictEnd, dictStart, rotatedPattern); + } + /* Limit backLength not go further than lowestMatchIndex */ + backLength = matchCandidateIdx - MAX(matchCandidateIdx - (U32)backLength, lowestMatchIndex); + assert(matchCandidateIdx - backLength >= lowestMatchIndex); + currentSegmentLength = backLength + forwardPatternLength; + /* Adjust to end of pattern if the source pattern fits, otherwise the beginning of the pattern */ + if ( (currentSegmentLength >= srcPatternLength) /* current pattern segment large enough to contain full srcPatternLength */ + && (forwardPatternLength <= srcPatternLength) ) { /* haven't reached this position yet */ + U32 const newMatchIndex = matchCandidateIdx + (U32)forwardPatternLength - (U32)srcPatternLength; /* best position, full pattern, might be followed by more match */ + if (LZ4HC_protectDictEnd(prefixIdx, newMatchIndex)) + matchIndex = newMatchIndex; + else { + /* Can only happen if started in the prefix */ + assert(newMatchIndex >= prefixIdx - 3 && newMatchIndex < prefixIdx && !extDict); + matchIndex = prefixIdx; + } + } else { + U32 const newMatchIndex = matchCandidateIdx - (U32)backLength; /* farthest position in current segment, will find a match of length currentSegmentLength + maybe some back */ + if (!LZ4HC_protectDictEnd(prefixIdx, newMatchIndex)) { + assert(newMatchIndex >= prefixIdx - 3 && newMatchIndex < prefixIdx && !extDict); + matchIndex = prefixIdx; + } else { + matchIndex = newMatchIndex; + if (lookBackLength==0) { /* no back possible */ + size_t const maxML = MIN(currentSegmentLength, srcPatternLength); + if ((size_t)longest < maxML) { + assert(prefixPtr - prefixIdx + matchIndex != ip); + if ((size_t)(ip - prefixPtr) + prefixIdx - matchIndex > LZ4_DISTANCE_MAX) break; + assert(maxML < 2 GB); + longest = (int)maxML; + *matchpos = prefixPtr - prefixIdx + matchIndex; /* virtual pos, relative to ip, to retrieve offset */ + *startpos = ip; + } + { U32 const distToNextPattern = DELTANEXTU16(chainTable, matchIndex); + if (distToNextPattern > matchIndex) break; /* avoid overflow */ + matchIndex -= distToNextPattern; + } } } } } + continue; + } } + } } /* PA optimization */ + + /* follow current chain */ + matchIndex -= DELTANEXTU16(chainTable, matchIndex + matchChainPos); + + } /* while ((matchIndex>=lowestMatchIndex) && (nbAttempts)) */ + + if ( dict == usingDictCtxHc + && nbAttempts > 0 + && ipIndex - lowestMatchIndex < LZ4_DISTANCE_MAX) { + size_t const dictEndOffset = (size_t)(dictCtx->end - dictCtx->prefixStart) + dictCtx->dictLimit; + U32 dictMatchIndex = dictCtx->hashTable[LZ4HC_hashPtr(ip)]; + assert(dictEndOffset <= 1 GB); + matchIndex = dictMatchIndex + lowestMatchIndex - (U32)dictEndOffset; + while (ipIndex - matchIndex <= LZ4_DISTANCE_MAX && nbAttempts--) { + const BYTE* const matchPtr = dictCtx->prefixStart - dictCtx->dictLimit + dictMatchIndex; + + if (LZ4_read32(matchPtr) == pattern) { + int mlt; + int back = 0; + const BYTE* vLimit = ip + (dictEndOffset - dictMatchIndex); + if (vLimit > iHighLimit) vLimit = iHighLimit; + mlt = (int)LZ4_count(ip+MINMATCH, matchPtr+MINMATCH, vLimit) + MINMATCH; + back = lookBackLength ? LZ4HC_countBack(ip, matchPtr, iLowLimit, dictCtx->prefixStart) : 0; + mlt -= back; + if (mlt > longest) { + longest = mlt; + *matchpos = prefixPtr - prefixIdx + matchIndex + back; + *startpos = ip + back; + } } + + { U32 const nextOffset = DELTANEXTU16(dictCtx->chainTable, dictMatchIndex); + dictMatchIndex -= nextOffset; + matchIndex -= nextOffset; + } } } + + return longest; +} + +LZ4_FORCE_INLINE int +LZ4HC_InsertAndFindBestMatch(LZ4HC_CCtx_internal* const hc4, /* Index table will be updated */ + const BYTE* const ip, const BYTE* const iLimit, + const BYTE** matchpos, + const int maxNbAttempts, + const int patternAnalysis, + const dictCtx_directive dict) +{ + const BYTE* uselessPtr = ip; + /* note : LZ4HC_InsertAndGetWiderMatch() is able to modify the starting position of a match (*startpos), + * but this won't be the case here, as we define iLowLimit==ip, + * so LZ4HC_InsertAndGetWiderMatch() won't be allowed to search past ip */ + return LZ4HC_InsertAndGetWiderMatch(hc4, ip, ip, iLimit, MINMATCH-1, matchpos, &uselessPtr, maxNbAttempts, patternAnalysis, 0 /*chainSwap*/, dict, favorCompressionRatio); +} + +/* LZ4HC_encodeSequence() : + * @return : 0 if ok, + * 1 if buffer issue detected */ +LZ4_FORCE_INLINE int LZ4HC_encodeSequence ( + const BYTE** _ip, + BYTE** _op, + const BYTE** _anchor, + int matchLength, + const BYTE* const match, + limitedOutput_directive limit, + BYTE* oend) +{ +#define ip (*_ip) +#define op (*_op) +#define anchor (*_anchor) + + size_t length; + BYTE* const token = op++; + +#if defined(LZ4_DEBUG) && (LZ4_DEBUG >= 6) + static const BYTE* start = NULL; + static U32 totalCost = 0; + U32 const pos = (start==NULL) ? 0 : (U32)(anchor - start); + U32 const ll = (U32)(ip - anchor); + U32 const llAdd = (ll>=15) ? ((ll-15) / 255) + 1 : 0; + U32 const mlAdd = (matchLength>=19) ? ((matchLength-19) / 255) + 1 : 0; + U32 const cost = 1 + llAdd + ll + 2 + mlAdd; + if (start==NULL) start = anchor; /* only works for single segment */ + /* g_debuglog_enable = (pos >= 2228) & (pos <= 2262); */ + DEBUGLOG(6, "pos:%7u -- literals:%4u, match:%4i, offset:%5u, cost:%4u + %5u", + pos, + (U32)(ip - anchor), matchLength, (U32)(ip-match), + cost, totalCost); + totalCost += cost; +#endif + + /* Encode Literal length */ + length = (size_t)(ip - anchor); + LZ4_STATIC_ASSERT(notLimited == 0); + /* Check output limit */ + if (limit && ((op + (length / 255) + length + (2 + 1 + LASTLITERALS)) > oend)) { + DEBUGLOG(6, "Not enough room to write %i literals (%i bytes remaining)", + (int)length, (int)(oend - op)); + return 1; + } + if (length >= RUN_MASK) { + size_t len = length - RUN_MASK; + *token = (RUN_MASK << ML_BITS); + for(; len >= 255 ; len -= 255) *op++ = 255; + *op++ = (BYTE)len; + } else { + *token = (BYTE)(length << ML_BITS); + } + + /* Copy Literals */ + LZ4_wildCopy8(op, anchor, op + length); + op += length; + + /* Encode Offset */ + assert( (ip - match) <= LZ4_DISTANCE_MAX ); /* note : consider providing offset as a value, rather than as a pointer difference */ + LZ4_writeLE16(op, (U16)(ip - match)); op += 2; + + /* Encode MatchLength */ + assert(matchLength >= MINMATCH); + length = (size_t)matchLength - MINMATCH; + if (limit && (op + (length / 255) + (1 + LASTLITERALS) > oend)) { + DEBUGLOG(6, "Not enough room to write match length"); + return 1; /* Check output limit */ + } + if (length >= ML_MASK) { + *token += ML_MASK; + length -= ML_MASK; + for(; length >= 510 ; length -= 510) { *op++ = 255; *op++ = 255; } + if (length >= 255) { length -= 255; *op++ = 255; } + *op++ = (BYTE)length; + } else { + *token += (BYTE)(length); + } + + /* Prepare next loop */ + ip += matchLength; + anchor = ip; + + return 0; +} +#undef ip +#undef op +#undef anchor + +LZ4_FORCE_INLINE int LZ4HC_compress_hashChain ( + LZ4HC_CCtx_internal* const ctx, + const char* const source, + char* const dest, + int* srcSizePtr, + int const maxOutputSize, + int maxNbAttempts, + const limitedOutput_directive limit, + const dictCtx_directive dict + ) +{ + const int inputSize = *srcSizePtr; + const int patternAnalysis = (maxNbAttempts > 128); /* levels 9+ */ + + const BYTE* ip = (const BYTE*) source; + const BYTE* anchor = ip; + const BYTE* const iend = ip + inputSize; + const BYTE* const mflimit = iend - MFLIMIT; + const BYTE* const matchlimit = (iend - LASTLITERALS); + + BYTE* optr = (BYTE*) dest; + BYTE* op = (BYTE*) dest; + BYTE* oend = op + maxOutputSize; + + int ml0, ml, ml2, ml3; + const BYTE* start0; + const BYTE* ref0; + const BYTE* ref = NULL; + const BYTE* start2 = NULL; + const BYTE* ref2 = NULL; + const BYTE* start3 = NULL; + const BYTE* ref3 = NULL; + + /* init */ + *srcSizePtr = 0; + if (limit == fillOutput) oend -= LASTLITERALS; /* Hack for support LZ4 format restriction */ + if (inputSize < LZ4_minLength) goto _last_literals; /* Input too small, no compression (all literals) */ + + /* Main Loop */ + while (ip <= mflimit) { + ml = LZ4HC_InsertAndFindBestMatch(ctx, ip, matchlimit, &ref, maxNbAttempts, patternAnalysis, dict); + if (ml encode ML1 */ + optr = op; + if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ref, limit, oend)) goto _dest_overflow; + continue; + } + + if (start0 < ip) { /* first match was skipped at least once */ + if (start2 < ip + ml0) { /* squeezing ML1 between ML0(original ML1) and ML2 */ + ip = start0; ref = ref0; ml = ml0; /* restore initial ML1 */ + } } + + /* Here, start0==ip */ + if ((start2 - ip) < 3) { /* First Match too small : removed */ + ml = ml2; + ip = start2; + ref =ref2; + goto _Search2; + } + +_Search3: + /* At this stage, we have : + * ml2 > ml1, and + * ip1+3 <= ip2 (usually < ip1+ml1) */ + if ((start2 - ip) < OPTIMAL_ML) { + int correction; + int new_ml = ml; + if (new_ml > OPTIMAL_ML) new_ml = OPTIMAL_ML; + if (ip+new_ml > start2 + ml2 - MINMATCH) new_ml = (int)(start2 - ip) + ml2 - MINMATCH; + correction = new_ml - (int)(start2 - ip); + if (correction > 0) { + start2 += correction; + ref2 += correction; + ml2 -= correction; + } + } + /* Now, we have start2 = ip+new_ml, with new_ml = min(ml, OPTIMAL_ML=18) */ + + if (start2 + ml2 <= mflimit) { + ml3 = LZ4HC_InsertAndGetWiderMatch(ctx, + start2 + ml2 - 3, start2, matchlimit, ml2, &ref3, &start3, + maxNbAttempts, patternAnalysis, 0, dict, favorCompressionRatio); + } else { + ml3 = ml2; + } + + if (ml3 == ml2) { /* No better match => encode ML1 and ML2 */ + /* ip & ref are known; Now for ml */ + if (start2 < ip+ml) ml = (int)(start2 - ip); + /* Now, encode 2 sequences */ + optr = op; + if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ref, limit, oend)) goto _dest_overflow; + ip = start2; + optr = op; + if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml2, ref2, limit, oend)) { + ml = ml2; + ref = ref2; + goto _dest_overflow; + } + continue; + } + + if (start3 < ip+ml+3) { /* Not enough space for match 2 : remove it */ + if (start3 >= (ip+ml)) { /* can write Seq1 immediately ==> Seq2 is removed, so Seq3 becomes Seq1 */ + if (start2 < ip+ml) { + int correction = (int)(ip+ml - start2); + start2 += correction; + ref2 += correction; + ml2 -= correction; + if (ml2 < MINMATCH) { + start2 = start3; + ref2 = ref3; + ml2 = ml3; + } + } + + optr = op; + if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ref, limit, oend)) goto _dest_overflow; + ip = start3; + ref = ref3; + ml = ml3; + + start0 = start2; + ref0 = ref2; + ml0 = ml2; + goto _Search2; + } + + start2 = start3; + ref2 = ref3; + ml2 = ml3; + goto _Search3; + } + + /* + * OK, now we have 3 ascending matches; + * let's write the first one ML1. + * ip & ref are known; Now decide ml. + */ + if (start2 < ip+ml) { + if ((start2 - ip) < OPTIMAL_ML) { + int correction; + if (ml > OPTIMAL_ML) ml = OPTIMAL_ML; + if (ip + ml > start2 + ml2 - MINMATCH) ml = (int)(start2 - ip) + ml2 - MINMATCH; + correction = ml - (int)(start2 - ip); + if (correction > 0) { + start2 += correction; + ref2 += correction; + ml2 -= correction; + } + } else { + ml = (int)(start2 - ip); + } + } + optr = op; + if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ref, limit, oend)) goto _dest_overflow; + + /* ML2 becomes ML1 */ + ip = start2; ref = ref2; ml = ml2; + + /* ML3 becomes ML2 */ + start2 = start3; ref2 = ref3; ml2 = ml3; + + /* let's find a new ML3 */ + goto _Search3; + } + +_last_literals: + /* Encode Last Literals */ + { size_t lastRunSize = (size_t)(iend - anchor); /* literals */ + size_t llAdd = (lastRunSize + 255 - RUN_MASK) / 255; + size_t const totalSize = 1 + llAdd + lastRunSize; + if (limit == fillOutput) oend += LASTLITERALS; /* restore correct value */ + if (limit && (op + totalSize > oend)) { + if (limit == limitedOutput) return 0; + /* adapt lastRunSize to fill 'dest' */ + lastRunSize = (size_t)(oend - op) - 1 /*token*/; + llAdd = (lastRunSize + 256 - RUN_MASK) / 256; + lastRunSize -= llAdd; + } + DEBUGLOG(6, "Final literal run : %i literals", (int)lastRunSize); + ip = anchor + lastRunSize; /* can be != iend if limit==fillOutput */ + + if (lastRunSize >= RUN_MASK) { + size_t accumulator = lastRunSize - RUN_MASK; + *op++ = (RUN_MASK << ML_BITS); + for(; accumulator >= 255 ; accumulator -= 255) *op++ = 255; + *op++ = (BYTE) accumulator; + } else { + *op++ = (BYTE)(lastRunSize << ML_BITS); + } + LZ4_memcpy(op, anchor, lastRunSize); + op += lastRunSize; + } + + /* End */ + *srcSizePtr = (int) (((const char*)ip) - source); + return (int) (((char*)op)-dest); + +_dest_overflow: + if (limit == fillOutput) { + /* Assumption : ip, anchor, ml and ref must be set correctly */ + size_t const ll = (size_t)(ip - anchor); + size_t const ll_addbytes = (ll + 240) / 255; + size_t const ll_totalCost = 1 + ll_addbytes + ll; + BYTE* const maxLitPos = oend - 3; /* 2 for offset, 1 for token */ + DEBUGLOG(6, "Last sequence overflowing"); + op = optr; /* restore correct out pointer */ + if (op + ll_totalCost <= maxLitPos) { + /* ll validated; now adjust match length */ + size_t const bytesLeftForMl = (size_t)(maxLitPos - (op+ll_totalCost)); + size_t const maxMlSize = MINMATCH + (ML_MASK-1) + (bytesLeftForMl * 255); + assert(maxMlSize < INT_MAX); assert(ml >= 0); + if ((size_t)ml > maxMlSize) ml = (int)maxMlSize; + if ((oend + LASTLITERALS) - (op + ll_totalCost + 2) - 1 + ml >= MFLIMIT) { + LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ref, notLimited, oend); + } } + goto _last_literals; + } + /* compression failed */ + return 0; +} + + +static int LZ4HC_compress_optimal( LZ4HC_CCtx_internal* ctx, + const char* const source, char* dst, + int* srcSizePtr, int dstCapacity, + int const nbSearches, size_t sufficient_len, + const limitedOutput_directive limit, int const fullUpdate, + const dictCtx_directive dict, + const HCfavor_e favorDecSpeed); + + +LZ4_FORCE_INLINE int LZ4HC_compress_generic_internal ( + LZ4HC_CCtx_internal* const ctx, + const char* const src, + char* const dst, + int* const srcSizePtr, + int const dstCapacity, + int cLevel, + const limitedOutput_directive limit, + const dictCtx_directive dict + ) +{ + typedef enum { lz4hc, lz4opt } lz4hc_strat_e; + typedef struct { + lz4hc_strat_e strat; + int nbSearches; + U32 targetLength; + } cParams_t; + static const cParams_t clTable[LZ4HC_CLEVEL_MAX+1] = { + { lz4hc, 2, 16 }, /* 0, unused */ + { lz4hc, 2, 16 }, /* 1, unused */ + { lz4hc, 2, 16 }, /* 2, unused */ + { lz4hc, 4, 16 }, /* 3 */ + { lz4hc, 8, 16 }, /* 4 */ + { lz4hc, 16, 16 }, /* 5 */ + { lz4hc, 32, 16 }, /* 6 */ + { lz4hc, 64, 16 }, /* 7 */ + { lz4hc, 128, 16 }, /* 8 */ + { lz4hc, 256, 16 }, /* 9 */ + { lz4opt, 96, 64 }, /*10==LZ4HC_CLEVEL_OPT_MIN*/ + { lz4opt, 512,128 }, /*11 */ + { lz4opt,16384,LZ4_OPT_NUM }, /* 12==LZ4HC_CLEVEL_MAX */ + }; + + DEBUGLOG(4, "LZ4HC_compress_generic(ctx=%p, src=%p, srcSize=%d, limit=%d)", + ctx, src, *srcSizePtr, limit); + + if (limit == fillOutput && dstCapacity < 1) return 0; /* Impossible to store anything */ + if ((U32)*srcSizePtr > (U32)LZ4_MAX_INPUT_SIZE) return 0; /* Unsupported input size (too large or negative) */ + + ctx->end += *srcSizePtr; + if (cLevel < 1) cLevel = LZ4HC_CLEVEL_DEFAULT; /* note : convention is different from lz4frame, maybe something to review */ + cLevel = MIN(LZ4HC_CLEVEL_MAX, cLevel); + { cParams_t const cParam = clTable[cLevel]; + HCfavor_e const favor = ctx->favorDecSpeed ? favorDecompressionSpeed : favorCompressionRatio; + int result; + + if (cParam.strat == lz4hc) { + result = LZ4HC_compress_hashChain(ctx, + src, dst, srcSizePtr, dstCapacity, + cParam.nbSearches, limit, dict); + } else { + assert(cParam.strat == lz4opt); + result = LZ4HC_compress_optimal(ctx, + src, dst, srcSizePtr, dstCapacity, + cParam.nbSearches, cParam.targetLength, limit, + cLevel == LZ4HC_CLEVEL_MAX, /* ultra mode */ + dict, favor); + } + if (result <= 0) ctx->dirty = 1; + return result; + } +} + +static void LZ4HC_setExternalDict(LZ4HC_CCtx_internal* ctxPtr, const BYTE* newBlock); + +static int +LZ4HC_compress_generic_noDictCtx ( + LZ4HC_CCtx_internal* const ctx, + const char* const src, + char* const dst, + int* const srcSizePtr, + int const dstCapacity, + int cLevel, + limitedOutput_directive limit + ) +{ + assert(ctx->dictCtx == NULL); + return LZ4HC_compress_generic_internal(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit, noDictCtx); +} + +static int +LZ4HC_compress_generic_dictCtx ( + LZ4HC_CCtx_internal* const ctx, + const char* const src, + char* const dst, + int* const srcSizePtr, + int const dstCapacity, + int cLevel, + limitedOutput_directive limit + ) +{ + const size_t position = (size_t)(ctx->end - ctx->prefixStart) + (ctx->dictLimit - ctx->lowLimit); + assert(ctx->dictCtx != NULL); + if (position >= 64 KB) { + ctx->dictCtx = NULL; + return LZ4HC_compress_generic_noDictCtx(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit); + } else if (position == 0 && *srcSizePtr > 4 KB) { + LZ4_memcpy(ctx, ctx->dictCtx, sizeof(LZ4HC_CCtx_internal)); + LZ4HC_setExternalDict(ctx, (const BYTE *)src); + ctx->compressionLevel = (short)cLevel; + return LZ4HC_compress_generic_noDictCtx(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit); + } else { + return LZ4HC_compress_generic_internal(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit, usingDictCtxHc); + } +} + +static int +LZ4HC_compress_generic ( + LZ4HC_CCtx_internal* const ctx, + const char* const src, + char* const dst, + int* const srcSizePtr, + int const dstCapacity, + int cLevel, + limitedOutput_directive limit + ) +{ + if (ctx->dictCtx == NULL) { + return LZ4HC_compress_generic_noDictCtx(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit); + } else { + return LZ4HC_compress_generic_dictCtx(ctx, src, dst, srcSizePtr, dstCapacity, cLevel, limit); + } +} + + +int LZ4_sizeofStateHC(void) { return (int)sizeof(LZ4_streamHC_t); } + +static size_t LZ4_streamHC_t_alignment(void) +{ +#if LZ4_ALIGN_TEST + typedef struct { char c; LZ4_streamHC_t t; } t_a; + return sizeof(t_a) - sizeof(LZ4_streamHC_t); +#else + return 1; /* effectively disabled */ +#endif +} + +/* state is presumed correctly initialized, + * in which case its size and alignment have already been validate */ +int LZ4_compress_HC_extStateHC_fastReset (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel) +{ + LZ4HC_CCtx_internal* const ctx = &((LZ4_streamHC_t*)state)->internal_donotuse; + if (!LZ4_isAligned(state, LZ4_streamHC_t_alignment())) return 0; + LZ4_resetStreamHC_fast((LZ4_streamHC_t*)state, compressionLevel); + LZ4HC_init_internal (ctx, (const BYTE*)src); + if (dstCapacity < LZ4_compressBound(srcSize)) + return LZ4HC_compress_generic (ctx, src, dst, &srcSize, dstCapacity, compressionLevel, limitedOutput); + else + return LZ4HC_compress_generic (ctx, src, dst, &srcSize, dstCapacity, compressionLevel, notLimited); +} + +int LZ4_compress_HC_extStateHC (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel) +{ + LZ4_streamHC_t* const ctx = LZ4_initStreamHC(state, sizeof(*ctx)); + if (ctx==NULL) return 0; /* init failure */ + return LZ4_compress_HC_extStateHC_fastReset(state, src, dst, srcSize, dstCapacity, compressionLevel); +} + +int LZ4_compress_HC(const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel) +{ + int cSize; +#if defined(LZ4HC_HEAPMODE) && LZ4HC_HEAPMODE==1 + LZ4_streamHC_t* const statePtr = (LZ4_streamHC_t*)ALLOC(sizeof(LZ4_streamHC_t)); + if (statePtr==NULL) return 0; +#else + LZ4_streamHC_t state; + LZ4_streamHC_t* const statePtr = &state; +#endif + cSize = LZ4_compress_HC_extStateHC(statePtr, src, dst, srcSize, dstCapacity, compressionLevel); +#if defined(LZ4HC_HEAPMODE) && LZ4HC_HEAPMODE==1 + FREEMEM(statePtr); +#endif + return cSize; +} + +/* state is presumed sized correctly (>= sizeof(LZ4_streamHC_t)) */ +int LZ4_compress_HC_destSize(void* state, const char* source, char* dest, int* sourceSizePtr, int targetDestSize, int cLevel) +{ + LZ4_streamHC_t* const ctx = LZ4_initStreamHC(state, sizeof(*ctx)); + if (ctx==NULL) return 0; /* init failure */ + LZ4HC_init_internal(&ctx->internal_donotuse, (const BYTE*) source); + LZ4_setCompressionLevel(ctx, cLevel); + return LZ4HC_compress_generic(&ctx->internal_donotuse, source, dest, sourceSizePtr, targetDestSize, cLevel, fillOutput); +} + + + +/************************************** +* Streaming Functions +**************************************/ +/* allocation */ +#if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION) +LZ4_streamHC_t* LZ4_createStreamHC(void) +{ + LZ4_streamHC_t* const state = + (LZ4_streamHC_t*)ALLOC_AND_ZERO(sizeof(LZ4_streamHC_t)); + if (state == NULL) return NULL; + LZ4_setCompressionLevel(state, LZ4HC_CLEVEL_DEFAULT); + return state; +} + +int LZ4_freeStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr) +{ + DEBUGLOG(4, "LZ4_freeStreamHC(%p)", LZ4_streamHCPtr); + if (!LZ4_streamHCPtr) return 0; /* support free on NULL */ + FREEMEM(LZ4_streamHCPtr); + return 0; +} +#endif + + +LZ4_streamHC_t* LZ4_initStreamHC (void* buffer, size_t size) +{ + LZ4_streamHC_t* const LZ4_streamHCPtr = (LZ4_streamHC_t*)buffer; + DEBUGLOG(4, "LZ4_initStreamHC(%p, %u)", buffer, (unsigned)size); + /* check conditions */ + if (buffer == NULL) return NULL; + if (size < sizeof(LZ4_streamHC_t)) return NULL; + if (!LZ4_isAligned(buffer, LZ4_streamHC_t_alignment())) return NULL; + /* init */ + { LZ4HC_CCtx_internal* const hcstate = &(LZ4_streamHCPtr->internal_donotuse); + MEM_INIT(hcstate, 0, sizeof(*hcstate)); } + LZ4_setCompressionLevel(LZ4_streamHCPtr, LZ4HC_CLEVEL_DEFAULT); + return LZ4_streamHCPtr; +} + +/* just a stub */ +void LZ4_resetStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel) +{ + LZ4_initStreamHC(LZ4_streamHCPtr, sizeof(*LZ4_streamHCPtr)); + LZ4_setCompressionLevel(LZ4_streamHCPtr, compressionLevel); +} + +void LZ4_resetStreamHC_fast (LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel) +{ + DEBUGLOG(4, "LZ4_resetStreamHC_fast(%p, %d)", LZ4_streamHCPtr, compressionLevel); + if (LZ4_streamHCPtr->internal_donotuse.dirty) { + LZ4_initStreamHC(LZ4_streamHCPtr, sizeof(*LZ4_streamHCPtr)); + } else { + /* preserve end - prefixStart : can trigger clearTable's threshold */ + if (LZ4_streamHCPtr->internal_donotuse.end != NULL) { + LZ4_streamHCPtr->internal_donotuse.end -= (uptrval)LZ4_streamHCPtr->internal_donotuse.prefixStart; + } else { + assert(LZ4_streamHCPtr->internal_donotuse.prefixStart == NULL); + } + LZ4_streamHCPtr->internal_donotuse.prefixStart = NULL; + LZ4_streamHCPtr->internal_donotuse.dictCtx = NULL; + } + LZ4_setCompressionLevel(LZ4_streamHCPtr, compressionLevel); +} + +void LZ4_setCompressionLevel(LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel) +{ + DEBUGLOG(5, "LZ4_setCompressionLevel(%p, %d)", LZ4_streamHCPtr, compressionLevel); + if (compressionLevel < 1) compressionLevel = LZ4HC_CLEVEL_DEFAULT; + if (compressionLevel > LZ4HC_CLEVEL_MAX) compressionLevel = LZ4HC_CLEVEL_MAX; + LZ4_streamHCPtr->internal_donotuse.compressionLevel = (short)compressionLevel; +} + +void LZ4_favorDecompressionSpeed(LZ4_streamHC_t* LZ4_streamHCPtr, int favor) +{ + LZ4_streamHCPtr->internal_donotuse.favorDecSpeed = (favor!=0); +} + +/* LZ4_loadDictHC() : + * LZ4_streamHCPtr is presumed properly initialized */ +int LZ4_loadDictHC (LZ4_streamHC_t* LZ4_streamHCPtr, + const char* dictionary, int dictSize) +{ + LZ4HC_CCtx_internal* const ctxPtr = &LZ4_streamHCPtr->internal_donotuse; + DEBUGLOG(4, "LZ4_loadDictHC(ctx:%p, dict:%p, dictSize:%d)", LZ4_streamHCPtr, dictionary, dictSize); + assert(LZ4_streamHCPtr != NULL); + if (dictSize > 64 KB) { + dictionary += (size_t)dictSize - 64 KB; + dictSize = 64 KB; + } + /* need a full initialization, there are bad side-effects when using resetFast() */ + { int const cLevel = ctxPtr->compressionLevel; + LZ4_initStreamHC(LZ4_streamHCPtr, sizeof(*LZ4_streamHCPtr)); + LZ4_setCompressionLevel(LZ4_streamHCPtr, cLevel); + } + LZ4HC_init_internal (ctxPtr, (const BYTE*)dictionary); + ctxPtr->end = (const BYTE*)dictionary + dictSize; + if (dictSize >= 4) LZ4HC_Insert (ctxPtr, ctxPtr->end-3); + return dictSize; +} + +void LZ4_attach_HC_dictionary(LZ4_streamHC_t *working_stream, const LZ4_streamHC_t *dictionary_stream) { + working_stream->internal_donotuse.dictCtx = dictionary_stream != NULL ? &(dictionary_stream->internal_donotuse) : NULL; +} + +/* compression */ + +static void LZ4HC_setExternalDict(LZ4HC_CCtx_internal* ctxPtr, const BYTE* newBlock) +{ + DEBUGLOG(4, "LZ4HC_setExternalDict(%p, %p)", ctxPtr, newBlock); + if (ctxPtr->end >= ctxPtr->prefixStart + 4) + LZ4HC_Insert (ctxPtr, ctxPtr->end-3); /* Referencing remaining dictionary content */ + + /* Only one memory segment for extDict, so any previous extDict is lost at this stage */ + ctxPtr->lowLimit = ctxPtr->dictLimit; + ctxPtr->dictStart = ctxPtr->prefixStart; + ctxPtr->dictLimit += (U32)(ctxPtr->end - ctxPtr->prefixStart); + ctxPtr->prefixStart = newBlock; + ctxPtr->end = newBlock; + ctxPtr->nextToUpdate = ctxPtr->dictLimit; /* match referencing will resume from there */ + + /* cannot reference an extDict and a dictCtx at the same time */ + ctxPtr->dictCtx = NULL; +} + +static int +LZ4_compressHC_continue_generic (LZ4_streamHC_t* LZ4_streamHCPtr, + const char* src, char* dst, + int* srcSizePtr, int dstCapacity, + limitedOutput_directive limit) +{ + LZ4HC_CCtx_internal* const ctxPtr = &LZ4_streamHCPtr->internal_donotuse; + DEBUGLOG(5, "LZ4_compressHC_continue_generic(ctx=%p, src=%p, srcSize=%d, limit=%d)", + LZ4_streamHCPtr, src, *srcSizePtr, limit); + assert(ctxPtr != NULL); + /* auto-init if forgotten */ + if (ctxPtr->prefixStart == NULL) LZ4HC_init_internal (ctxPtr, (const BYTE*) src); + + /* Check overflow */ + if ((size_t)(ctxPtr->end - ctxPtr->prefixStart) + ctxPtr->dictLimit > 2 GB) { + size_t dictSize = (size_t)(ctxPtr->end - ctxPtr->prefixStart); + if (dictSize > 64 KB) dictSize = 64 KB; + LZ4_loadDictHC(LZ4_streamHCPtr, (const char*)(ctxPtr->end) - dictSize, (int)dictSize); + } + + /* Check if blocks follow each other */ + if ((const BYTE*)src != ctxPtr->end) + LZ4HC_setExternalDict(ctxPtr, (const BYTE*)src); + + /* Check overlapping input/dictionary space */ + { const BYTE* sourceEnd = (const BYTE*) src + *srcSizePtr; + const BYTE* const dictBegin = ctxPtr->dictStart; + const BYTE* const dictEnd = ctxPtr->dictStart + (ctxPtr->dictLimit - ctxPtr->lowLimit); + if ((sourceEnd > dictBegin) && ((const BYTE*)src < dictEnd)) { + if (sourceEnd > dictEnd) sourceEnd = dictEnd; + ctxPtr->lowLimit += (U32)(sourceEnd - ctxPtr->dictStart); + ctxPtr->dictStart += (U32)(sourceEnd - ctxPtr->dictStart); + if (ctxPtr->dictLimit - ctxPtr->lowLimit < 4) { + ctxPtr->lowLimit = ctxPtr->dictLimit; + ctxPtr->dictStart = ctxPtr->prefixStart; + } } } + + return LZ4HC_compress_generic (ctxPtr, src, dst, srcSizePtr, dstCapacity, ctxPtr->compressionLevel, limit); +} + +int LZ4_compress_HC_continue (LZ4_streamHC_t* LZ4_streamHCPtr, const char* src, char* dst, int srcSize, int dstCapacity) +{ + if (dstCapacity < LZ4_compressBound(srcSize)) + return LZ4_compressHC_continue_generic (LZ4_streamHCPtr, src, dst, &srcSize, dstCapacity, limitedOutput); + else + return LZ4_compressHC_continue_generic (LZ4_streamHCPtr, src, dst, &srcSize, dstCapacity, notLimited); +} + +int LZ4_compress_HC_continue_destSize (LZ4_streamHC_t* LZ4_streamHCPtr, const char* src, char* dst, int* srcSizePtr, int targetDestSize) +{ + return LZ4_compressHC_continue_generic(LZ4_streamHCPtr, src, dst, srcSizePtr, targetDestSize, fillOutput); +} + + + +/* LZ4_saveDictHC : + * save history content + * into a user-provided buffer + * which is then used to continue compression + */ +int LZ4_saveDictHC (LZ4_streamHC_t* LZ4_streamHCPtr, char* safeBuffer, int dictSize) +{ + LZ4HC_CCtx_internal* const streamPtr = &LZ4_streamHCPtr->internal_donotuse; + int const prefixSize = (int)(streamPtr->end - streamPtr->prefixStart); + DEBUGLOG(5, "LZ4_saveDictHC(%p, %p, %d)", LZ4_streamHCPtr, safeBuffer, dictSize); + assert(prefixSize >= 0); + if (dictSize > 64 KB) dictSize = 64 KB; + if (dictSize < 4) dictSize = 0; + if (dictSize > prefixSize) dictSize = prefixSize; + if (safeBuffer == NULL) assert(dictSize == 0); + if (dictSize > 0) + LZ4_memmove(safeBuffer, streamPtr->end - dictSize, dictSize); + { U32 const endIndex = (U32)(streamPtr->end - streamPtr->prefixStart) + streamPtr->dictLimit; + streamPtr->end = (const BYTE*)safeBuffer + dictSize; + streamPtr->prefixStart = streamPtr->end - dictSize; + streamPtr->dictLimit = endIndex - (U32)dictSize; + streamPtr->lowLimit = endIndex - (U32)dictSize; + streamPtr->dictStart = streamPtr->prefixStart; + if (streamPtr->nextToUpdate < streamPtr->dictLimit) + streamPtr->nextToUpdate = streamPtr->dictLimit; + } + return dictSize; +} + + +/*************************************************** +* Deprecated Functions +***************************************************/ + +/* These functions currently generate deprecation warnings */ + +/* Wrappers for deprecated compression functions */ +int LZ4_compressHC(const char* src, char* dst, int srcSize) { return LZ4_compress_HC (src, dst, srcSize, LZ4_compressBound(srcSize), 0); } +int LZ4_compressHC_limitedOutput(const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_HC(src, dst, srcSize, maxDstSize, 0); } +int LZ4_compressHC2(const char* src, char* dst, int srcSize, int cLevel) { return LZ4_compress_HC (src, dst, srcSize, LZ4_compressBound(srcSize), cLevel); } +int LZ4_compressHC2_limitedOutput(const char* src, char* dst, int srcSize, int maxDstSize, int cLevel) { return LZ4_compress_HC(src, dst, srcSize, maxDstSize, cLevel); } +int LZ4_compressHC_withStateHC (void* state, const char* src, char* dst, int srcSize) { return LZ4_compress_HC_extStateHC (state, src, dst, srcSize, LZ4_compressBound(srcSize), 0); } +int LZ4_compressHC_limitedOutput_withStateHC (void* state, const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_HC_extStateHC (state, src, dst, srcSize, maxDstSize, 0); } +int LZ4_compressHC2_withStateHC (void* state, const char* src, char* dst, int srcSize, int cLevel) { return LZ4_compress_HC_extStateHC(state, src, dst, srcSize, LZ4_compressBound(srcSize), cLevel); } +int LZ4_compressHC2_limitedOutput_withStateHC (void* state, const char* src, char* dst, int srcSize, int maxDstSize, int cLevel) { return LZ4_compress_HC_extStateHC(state, src, dst, srcSize, maxDstSize, cLevel); } +int LZ4_compressHC_continue (LZ4_streamHC_t* ctx, const char* src, char* dst, int srcSize) { return LZ4_compress_HC_continue (ctx, src, dst, srcSize, LZ4_compressBound(srcSize)); } +int LZ4_compressHC_limitedOutput_continue (LZ4_streamHC_t* ctx, const char* src, char* dst, int srcSize, int maxDstSize) { return LZ4_compress_HC_continue (ctx, src, dst, srcSize, maxDstSize); } + + +/* Deprecated streaming functions */ +int LZ4_sizeofStreamStateHC(void) { return sizeof(LZ4_streamHC_t); } + +/* state is presumed correctly sized, aka >= sizeof(LZ4_streamHC_t) + * @return : 0 on success, !=0 if error */ +int LZ4_resetStreamStateHC(void* state, char* inputBuffer) +{ + LZ4_streamHC_t* const hc4 = LZ4_initStreamHC(state, sizeof(*hc4)); + if (hc4 == NULL) return 1; /* init failed */ + LZ4HC_init_internal (&hc4->internal_donotuse, (const BYTE*)inputBuffer); + return 0; +} + +#if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION) +void* LZ4_createHC (const char* inputBuffer) +{ + LZ4_streamHC_t* const hc4 = LZ4_createStreamHC(); + if (hc4 == NULL) return NULL; /* not enough memory */ + LZ4HC_init_internal (&hc4->internal_donotuse, (const BYTE*)inputBuffer); + return hc4; +} + +int LZ4_freeHC (void* LZ4HC_Data) +{ + if (!LZ4HC_Data) return 0; /* support free on NULL */ + FREEMEM(LZ4HC_Data); + return 0; +} +#endif + +int LZ4_compressHC2_continue (void* LZ4HC_Data, const char* src, char* dst, int srcSize, int cLevel) +{ + return LZ4HC_compress_generic (&((LZ4_streamHC_t*)LZ4HC_Data)->internal_donotuse, src, dst, &srcSize, 0, cLevel, notLimited); +} + +int LZ4_compressHC2_limitedOutput_continue (void* LZ4HC_Data, const char* src, char* dst, int srcSize, int dstCapacity, int cLevel) +{ + return LZ4HC_compress_generic (&((LZ4_streamHC_t*)LZ4HC_Data)->internal_donotuse, src, dst, &srcSize, dstCapacity, cLevel, limitedOutput); +} + +char* LZ4_slideInputBufferHC(void* LZ4HC_Data) +{ + LZ4_streamHC_t* const ctx = (LZ4_streamHC_t*)LZ4HC_Data; + const BYTE* bufferStart = ctx->internal_donotuse.prefixStart - ctx->internal_donotuse.dictLimit + ctx->internal_donotuse.lowLimit; + LZ4_resetStreamHC_fast(ctx, ctx->internal_donotuse.compressionLevel); + /* avoid const char * -> char * conversion warning :( */ + return (char*)(uptrval)bufferStart; +} + + +/* ================================================ + * LZ4 Optimal parser (levels [LZ4HC_CLEVEL_OPT_MIN - LZ4HC_CLEVEL_MAX]) + * ===============================================*/ +typedef struct { + int price; + int off; + int mlen; + int litlen; +} LZ4HC_optimal_t; + +/* price in bytes */ +LZ4_FORCE_INLINE int LZ4HC_literalsPrice(int const litlen) +{ + int price = litlen; + assert(litlen >= 0); + if (litlen >= (int)RUN_MASK) + price += 1 + ((litlen-(int)RUN_MASK) / 255); + return price; +} + + +/* requires mlen >= MINMATCH */ +LZ4_FORCE_INLINE int LZ4HC_sequencePrice(int litlen, int mlen) +{ + int price = 1 + 2 ; /* token + 16-bit offset */ + assert(litlen >= 0); + assert(mlen >= MINMATCH); + + price += LZ4HC_literalsPrice(litlen); + + if (mlen >= (int)(ML_MASK+MINMATCH)) + price += 1 + ((mlen-(int)(ML_MASK+MINMATCH)) / 255); + + return price; +} + + +typedef struct { + int off; + int len; +} LZ4HC_match_t; + +LZ4_FORCE_INLINE LZ4HC_match_t +LZ4HC_FindLongerMatch(LZ4HC_CCtx_internal* const ctx, + const BYTE* ip, const BYTE* const iHighLimit, + int minLen, int nbSearches, + const dictCtx_directive dict, + const HCfavor_e favorDecSpeed) +{ + LZ4HC_match_t match = { 0 , 0 }; + const BYTE* matchPtr = NULL; + /* note : LZ4HC_InsertAndGetWiderMatch() is able to modify the starting position of a match (*startpos), + * but this won't be the case here, as we define iLowLimit==ip, + * so LZ4HC_InsertAndGetWiderMatch() won't be allowed to search past ip */ + int matchLength = LZ4HC_InsertAndGetWiderMatch(ctx, ip, ip, iHighLimit, minLen, &matchPtr, &ip, nbSearches, 1 /*patternAnalysis*/, 1 /*chainSwap*/, dict, favorDecSpeed); + if (matchLength <= minLen) return match; + if (favorDecSpeed) { + if ((matchLength>18) & (matchLength<=36)) matchLength=18; /* favor shortcut */ + } + match.len = matchLength; + match.off = (int)(ip-matchPtr); + return match; +} + + +static int LZ4HC_compress_optimal ( LZ4HC_CCtx_internal* ctx, + const char* const source, + char* dst, + int* srcSizePtr, + int dstCapacity, + int const nbSearches, + size_t sufficient_len, + const limitedOutput_directive limit, + int const fullUpdate, + const dictCtx_directive dict, + const HCfavor_e favorDecSpeed) +{ + int retval = 0; +#define TRAILING_LITERALS 3 +#if defined(LZ4HC_HEAPMODE) && LZ4HC_HEAPMODE==1 + LZ4HC_optimal_t* const opt = (LZ4HC_optimal_t*)ALLOC(sizeof(LZ4HC_optimal_t) * (LZ4_OPT_NUM + TRAILING_LITERALS)); +#else + LZ4HC_optimal_t opt[LZ4_OPT_NUM + TRAILING_LITERALS]; /* ~64 KB, which is a bit large for stack... */ +#endif + + const BYTE* ip = (const BYTE*) source; + const BYTE* anchor = ip; + const BYTE* const iend = ip + *srcSizePtr; + const BYTE* const mflimit = iend - MFLIMIT; + const BYTE* const matchlimit = iend - LASTLITERALS; + BYTE* op = (BYTE*) dst; + BYTE* opSaved = (BYTE*) dst; + BYTE* oend = op + dstCapacity; + int ovml = MINMATCH; /* overflow - last sequence */ + const BYTE* ovref = NULL; + + /* init */ +#if defined(LZ4HC_HEAPMODE) && LZ4HC_HEAPMODE==1 + if (opt == NULL) goto _return_label; +#endif + DEBUGLOG(5, "LZ4HC_compress_optimal(dst=%p, dstCapa=%u)", dst, (unsigned)dstCapacity); + *srcSizePtr = 0; + if (limit == fillOutput) oend -= LASTLITERALS; /* Hack for support LZ4 format restriction */ + if (sufficient_len >= LZ4_OPT_NUM) sufficient_len = LZ4_OPT_NUM-1; + + /* Main Loop */ + while (ip <= mflimit) { + int const llen = (int)(ip - anchor); + int best_mlen, best_off; + int cur, last_match_pos = 0; + + LZ4HC_match_t const firstMatch = LZ4HC_FindLongerMatch(ctx, ip, matchlimit, MINMATCH-1, nbSearches, dict, favorDecSpeed); + if (firstMatch.len==0) { ip++; continue; } + + if ((size_t)firstMatch.len > sufficient_len) { + /* good enough solution : immediate encoding */ + int const firstML = firstMatch.len; + const BYTE* const matchPos = ip - firstMatch.off; + opSaved = op; + if ( LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), firstML, matchPos, limit, oend) ) { /* updates ip, op and anchor */ + ovml = firstML; + ovref = matchPos; + goto _dest_overflow; + } + continue; + } + + /* set prices for first positions (literals) */ + { int rPos; + for (rPos = 0 ; rPos < MINMATCH ; rPos++) { + int const cost = LZ4HC_literalsPrice(llen + rPos); + opt[rPos].mlen = 1; + opt[rPos].off = 0; + opt[rPos].litlen = llen + rPos; + opt[rPos].price = cost; + DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i) -- initial setup", + rPos, cost, opt[rPos].litlen); + } } + /* set prices using initial match */ + { int mlen = MINMATCH; + int const matchML = firstMatch.len; /* necessarily < sufficient_len < LZ4_OPT_NUM */ + int const offset = firstMatch.off; + assert(matchML < LZ4_OPT_NUM); + for ( ; mlen <= matchML ; mlen++) { + int const cost = LZ4HC_sequencePrice(llen, mlen); + opt[mlen].mlen = mlen; + opt[mlen].off = offset; + opt[mlen].litlen = llen; + opt[mlen].price = cost; + DEBUGLOG(7, "rPos:%3i => price:%3i (matchlen=%i) -- initial setup", + mlen, cost, mlen); + } } + last_match_pos = firstMatch.len; + { int addLit; + for (addLit = 1; addLit <= TRAILING_LITERALS; addLit ++) { + opt[last_match_pos+addLit].mlen = 1; /* literal */ + opt[last_match_pos+addLit].off = 0; + opt[last_match_pos+addLit].litlen = addLit; + opt[last_match_pos+addLit].price = opt[last_match_pos].price + LZ4HC_literalsPrice(addLit); + DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i) -- initial setup", + last_match_pos+addLit, opt[last_match_pos+addLit].price, addLit); + } } + + /* check further positions */ + for (cur = 1; cur < last_match_pos; cur++) { + const BYTE* const curPtr = ip + cur; + LZ4HC_match_t newMatch; + + if (curPtr > mflimit) break; + DEBUGLOG(7, "rPos:%u[%u] vs [%u]%u", + cur, opt[cur].price, opt[cur+1].price, cur+1); + if (fullUpdate) { + /* not useful to search here if next position has same (or lower) cost */ + if ( (opt[cur+1].price <= opt[cur].price) + /* in some cases, next position has same cost, but cost rises sharply after, so a small match would still be beneficial */ + && (opt[cur+MINMATCH].price < opt[cur].price + 3/*min seq price*/) ) + continue; + } else { + /* not useful to search here if next position has same (or lower) cost */ + if (opt[cur+1].price <= opt[cur].price) continue; + } + + DEBUGLOG(7, "search at rPos:%u", cur); + if (fullUpdate) + newMatch = LZ4HC_FindLongerMatch(ctx, curPtr, matchlimit, MINMATCH-1, nbSearches, dict, favorDecSpeed); + else + /* only test matches of minimum length; slightly faster, but misses a few bytes */ + newMatch = LZ4HC_FindLongerMatch(ctx, curPtr, matchlimit, last_match_pos - cur, nbSearches, dict, favorDecSpeed); + if (!newMatch.len) continue; + + if ( ((size_t)newMatch.len > sufficient_len) + || (newMatch.len + cur >= LZ4_OPT_NUM) ) { + /* immediate encoding */ + best_mlen = newMatch.len; + best_off = newMatch.off; + last_match_pos = cur + 1; + goto encode; + } + + /* before match : set price with literals at beginning */ + { int const baseLitlen = opt[cur].litlen; + int litlen; + for (litlen = 1; litlen < MINMATCH; litlen++) { + int const price = opt[cur].price - LZ4HC_literalsPrice(baseLitlen) + LZ4HC_literalsPrice(baseLitlen+litlen); + int const pos = cur + litlen; + if (price < opt[pos].price) { + opt[pos].mlen = 1; /* literal */ + opt[pos].off = 0; + opt[pos].litlen = baseLitlen+litlen; + opt[pos].price = price; + DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i)", + pos, price, opt[pos].litlen); + } } } + + /* set prices using match at position = cur */ + { int const matchML = newMatch.len; + int ml = MINMATCH; + + assert(cur + newMatch.len < LZ4_OPT_NUM); + for ( ; ml <= matchML ; ml++) { + int const pos = cur + ml; + int const offset = newMatch.off; + int price; + int ll; + DEBUGLOG(7, "testing price rPos %i (last_match_pos=%i)", + pos, last_match_pos); + if (opt[cur].mlen == 1) { + ll = opt[cur].litlen; + price = ((cur > ll) ? opt[cur - ll].price : 0) + + LZ4HC_sequencePrice(ll, ml); + } else { + ll = 0; + price = opt[cur].price + LZ4HC_sequencePrice(0, ml); + } + + assert((U32)favorDecSpeed <= 1); + if (pos > last_match_pos+TRAILING_LITERALS + || price <= opt[pos].price - (int)favorDecSpeed) { + DEBUGLOG(7, "rPos:%3i => price:%3i (matchlen=%i)", + pos, price, ml); + assert(pos < LZ4_OPT_NUM); + if ( (ml == matchML) /* last pos of last match */ + && (last_match_pos < pos) ) + last_match_pos = pos; + opt[pos].mlen = ml; + opt[pos].off = offset; + opt[pos].litlen = ll; + opt[pos].price = price; + } } } + /* complete following positions with literals */ + { int addLit; + for (addLit = 1; addLit <= TRAILING_LITERALS; addLit ++) { + opt[last_match_pos+addLit].mlen = 1; /* literal */ + opt[last_match_pos+addLit].off = 0; + opt[last_match_pos+addLit].litlen = addLit; + opt[last_match_pos+addLit].price = opt[last_match_pos].price + LZ4HC_literalsPrice(addLit); + DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i)", last_match_pos+addLit, opt[last_match_pos+addLit].price, addLit); + } } + } /* for (cur = 1; cur <= last_match_pos; cur++) */ + + assert(last_match_pos < LZ4_OPT_NUM + TRAILING_LITERALS); + best_mlen = opt[last_match_pos].mlen; + best_off = opt[last_match_pos].off; + cur = last_match_pos - best_mlen; + +encode: /* cur, last_match_pos, best_mlen, best_off must be set */ + assert(cur < LZ4_OPT_NUM); + assert(last_match_pos >= 1); /* == 1 when only one candidate */ + DEBUGLOG(6, "reverse traversal, looking for shortest path (last_match_pos=%i)", last_match_pos); + { int candidate_pos = cur; + int selected_matchLength = best_mlen; + int selected_offset = best_off; + while (1) { /* from end to beginning */ + int const next_matchLength = opt[candidate_pos].mlen; /* can be 1, means literal */ + int const next_offset = opt[candidate_pos].off; + DEBUGLOG(7, "pos %i: sequence length %i", candidate_pos, selected_matchLength); + opt[candidate_pos].mlen = selected_matchLength; + opt[candidate_pos].off = selected_offset; + selected_matchLength = next_matchLength; + selected_offset = next_offset; + if (next_matchLength > candidate_pos) break; /* last match elected, first match to encode */ + assert(next_matchLength > 0); /* can be 1, means literal */ + candidate_pos -= next_matchLength; + } } + + /* encode all recorded sequences in order */ + { int rPos = 0; /* relative position (to ip) */ + while (rPos < last_match_pos) { + int const ml = opt[rPos].mlen; + int const offset = opt[rPos].off; + if (ml == 1) { ip++; rPos++; continue; } /* literal; note: can end up with several literals, in which case, skip them */ + rPos += ml; + assert(ml >= MINMATCH); + assert((offset >= 1) && (offset <= LZ4_DISTANCE_MAX)); + opSaved = op; + if ( LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ip - offset, limit, oend) ) { /* updates ip, op and anchor */ + ovml = ml; + ovref = ip - offset; + goto _dest_overflow; + } } } + } /* while (ip <= mflimit) */ + +_last_literals: + /* Encode Last Literals */ + { size_t lastRunSize = (size_t)(iend - anchor); /* literals */ + size_t llAdd = (lastRunSize + 255 - RUN_MASK) / 255; + size_t const totalSize = 1 + llAdd + lastRunSize; + if (limit == fillOutput) oend += LASTLITERALS; /* restore correct value */ + if (limit && (op + totalSize > oend)) { + if (limit == limitedOutput) { /* Check output limit */ + retval = 0; + goto _return_label; + } + /* adapt lastRunSize to fill 'dst' */ + lastRunSize = (size_t)(oend - op) - 1 /*token*/; + llAdd = (lastRunSize + 256 - RUN_MASK) / 256; + lastRunSize -= llAdd; + } + DEBUGLOG(6, "Final literal run : %i literals", (int)lastRunSize); + ip = anchor + lastRunSize; /* can be != iend if limit==fillOutput */ + + if (lastRunSize >= RUN_MASK) { + size_t accumulator = lastRunSize - RUN_MASK; + *op++ = (RUN_MASK << ML_BITS); + for(; accumulator >= 255 ; accumulator -= 255) *op++ = 255; + *op++ = (BYTE) accumulator; + } else { + *op++ = (BYTE)(lastRunSize << ML_BITS); + } + LZ4_memcpy(op, anchor, lastRunSize); + op += lastRunSize; + } + + /* End */ + *srcSizePtr = (int) (((const char*)ip) - source); + retval = (int) ((char*)op-dst); + goto _return_label; + +_dest_overflow: +if (limit == fillOutput) { + /* Assumption : ip, anchor, ovml and ovref must be set correctly */ + size_t const ll = (size_t)(ip - anchor); + size_t const ll_addbytes = (ll + 240) / 255; + size_t const ll_totalCost = 1 + ll_addbytes + ll; + BYTE* const maxLitPos = oend - 3; /* 2 for offset, 1 for token */ + DEBUGLOG(6, "Last sequence overflowing (only %i bytes remaining)", (int)(oend-1-opSaved)); + op = opSaved; /* restore correct out pointer */ + if (op + ll_totalCost <= maxLitPos) { + /* ll validated; now adjust match length */ + size_t const bytesLeftForMl = (size_t)(maxLitPos - (op+ll_totalCost)); + size_t const maxMlSize = MINMATCH + (ML_MASK-1) + (bytesLeftForMl * 255); + assert(maxMlSize < INT_MAX); assert(ovml >= 0); + if ((size_t)ovml > maxMlSize) ovml = (int)maxMlSize; + if ((oend + LASTLITERALS) - (op + ll_totalCost + 2) - 1 + ovml >= MFLIMIT) { + DEBUGLOG(6, "Space to end : %i + ml (%i)", (int)((oend + LASTLITERALS) - (op + ll_totalCost + 2) - 1), ovml); + DEBUGLOG(6, "Before : ip = %p, anchor = %p", ip, anchor); + LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ovml, ovref, notLimited, oend); + DEBUGLOG(6, "After : ip = %p, anchor = %p", ip, anchor); + } } + goto _last_literals; +} +_return_label: +#if defined(LZ4HC_HEAPMODE) && LZ4HC_HEAPMODE==1 + FREEMEM(opt); +#endif + return retval; +} diff --git a/c-blosc/internal-complibs/lz4-1.9.4/lz4hc.h b/c-blosc/internal-complibs/lz4-1.9.4/lz4hc.h new file mode 100644 index 0000000..e937acf --- /dev/null +++ b/c-blosc/internal-complibs/lz4-1.9.4/lz4hc.h @@ -0,0 +1,413 @@ +/* + LZ4 HC - High Compression Mode of LZ4 + Header File + Copyright (C) 2011-2020, Yann Collet. + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - LZ4 source repository : https://github.com/lz4/lz4 + - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c +*/ +#ifndef LZ4_HC_H_19834876238432 +#define LZ4_HC_H_19834876238432 + +#if defined (__cplusplus) +extern "C" { +#endif + +/* --- Dependency --- */ +/* note : lz4hc requires lz4.h/lz4.c for compilation */ +#include "lz4.h" /* stddef, LZ4LIB_API, LZ4_DEPRECATED */ + + +/* --- Useful constants --- */ +#define LZ4HC_CLEVEL_MIN 3 +#define LZ4HC_CLEVEL_DEFAULT 9 +#define LZ4HC_CLEVEL_OPT_MIN 10 +#define LZ4HC_CLEVEL_MAX 12 + + +/*-************************************ + * Block Compression + **************************************/ +/*! LZ4_compress_HC() : + * Compress data from `src` into `dst`, using the powerful but slower "HC" algorithm. + * `dst` must be already allocated. + * Compression is guaranteed to succeed if `dstCapacity >= LZ4_compressBound(srcSize)` (see "lz4.h") + * Max supported `srcSize` value is LZ4_MAX_INPUT_SIZE (see "lz4.h") + * `compressionLevel` : any value between 1 and LZ4HC_CLEVEL_MAX will work. + * Values > LZ4HC_CLEVEL_MAX behave the same as LZ4HC_CLEVEL_MAX. + * @return : the number of bytes written into 'dst' + * or 0 if compression fails. + */ +LZ4LIB_API int LZ4_compress_HC (const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel); + + +/* Note : + * Decompression functions are provided within "lz4.h" (BSD license) + */ + + +/*! LZ4_compress_HC_extStateHC() : + * Same as LZ4_compress_HC(), but using an externally allocated memory segment for `state`. + * `state` size is provided by LZ4_sizeofStateHC(). + * Memory segment must be aligned on 8-bytes boundaries (which a normal malloc() should do properly). + */ +LZ4LIB_API int LZ4_sizeofStateHC(void); +LZ4LIB_API int LZ4_compress_HC_extStateHC(void* stateHC, const char* src, char* dst, int srcSize, int maxDstSize, int compressionLevel); + + +/*! LZ4_compress_HC_destSize() : v1.9.0+ + * Will compress as much data as possible from `src` + * to fit into `targetDstSize` budget. + * Result is provided in 2 parts : + * @return : the number of bytes written into 'dst' (necessarily <= targetDstSize) + * or 0 if compression fails. + * `srcSizePtr` : on success, *srcSizePtr is updated to indicate how much bytes were read from `src` + */ +LZ4LIB_API int LZ4_compress_HC_destSize(void* stateHC, + const char* src, char* dst, + int* srcSizePtr, int targetDstSize, + int compressionLevel); + + +/*-************************************ + * Streaming Compression + * Bufferless synchronous API + **************************************/ + typedef union LZ4_streamHC_u LZ4_streamHC_t; /* incomplete type (defined later) */ + +/*! LZ4_createStreamHC() and LZ4_freeStreamHC() : + * These functions create and release memory for LZ4 HC streaming state. + * Newly created states are automatically initialized. + * A same state can be used multiple times consecutively, + * starting with LZ4_resetStreamHC_fast() to start a new stream of blocks. + */ +LZ4LIB_API LZ4_streamHC_t* LZ4_createStreamHC(void); +LZ4LIB_API int LZ4_freeStreamHC (LZ4_streamHC_t* streamHCPtr); + +/* + These functions compress data in successive blocks of any size, + using previous blocks as dictionary, to improve compression ratio. + One key assumption is that previous blocks (up to 64 KB) remain read-accessible while compressing next blocks. + There is an exception for ring buffers, which can be smaller than 64 KB. + Ring-buffer scenario is automatically detected and handled within LZ4_compress_HC_continue(). + + Before starting compression, state must be allocated and properly initialized. + LZ4_createStreamHC() does both, though compression level is set to LZ4HC_CLEVEL_DEFAULT. + + Selecting the compression level can be done with LZ4_resetStreamHC_fast() (starts a new stream) + or LZ4_setCompressionLevel() (anytime, between blocks in the same stream) (experimental). + LZ4_resetStreamHC_fast() only works on states which have been properly initialized at least once, + which is automatically the case when state is created using LZ4_createStreamHC(). + + After reset, a first "fictional block" can be designated as initial dictionary, + using LZ4_loadDictHC() (Optional). + + Invoke LZ4_compress_HC_continue() to compress each successive block. + The number of blocks is unlimited. + Previous input blocks, including initial dictionary when present, + must remain accessible and unmodified during compression. + + It's allowed to update compression level anytime between blocks, + using LZ4_setCompressionLevel() (experimental). + + 'dst' buffer should be sized to handle worst case scenarios + (see LZ4_compressBound(), it ensures compression success). + In case of failure, the API does not guarantee recovery, + so the state _must_ be reset. + To ensure compression success + whenever `dst` buffer size cannot be made >= LZ4_compressBound(), + consider using LZ4_compress_HC_continue_destSize(). + + Whenever previous input blocks can't be preserved unmodified in-place during compression of next blocks, + it's possible to copy the last blocks into a more stable memory space, using LZ4_saveDictHC(). + Return value of LZ4_saveDictHC() is the size of dictionary effectively saved into 'safeBuffer' (<= 64 KB) + + After completing a streaming compression, + it's possible to start a new stream of blocks, using the same LZ4_streamHC_t state, + just by resetting it, using LZ4_resetStreamHC_fast(). +*/ + +LZ4LIB_API void LZ4_resetStreamHC_fast(LZ4_streamHC_t* streamHCPtr, int compressionLevel); /* v1.9.0+ */ +LZ4LIB_API int LZ4_loadDictHC (LZ4_streamHC_t* streamHCPtr, const char* dictionary, int dictSize); + +LZ4LIB_API int LZ4_compress_HC_continue (LZ4_streamHC_t* streamHCPtr, + const char* src, char* dst, + int srcSize, int maxDstSize); + +/*! LZ4_compress_HC_continue_destSize() : v1.9.0+ + * Similar to LZ4_compress_HC_continue(), + * but will read as much data as possible from `src` + * to fit into `targetDstSize` budget. + * Result is provided into 2 parts : + * @return : the number of bytes written into 'dst' (necessarily <= targetDstSize) + * or 0 if compression fails. + * `srcSizePtr` : on success, *srcSizePtr will be updated to indicate how much bytes were read from `src`. + * Note that this function may not consume the entire input. + */ +LZ4LIB_API int LZ4_compress_HC_continue_destSize(LZ4_streamHC_t* LZ4_streamHCPtr, + const char* src, char* dst, + int* srcSizePtr, int targetDstSize); + +LZ4LIB_API int LZ4_saveDictHC (LZ4_streamHC_t* streamHCPtr, char* safeBuffer, int maxDictSize); + + + +/*^********************************************** + * !!!!!! STATIC LINKING ONLY !!!!!! + ***********************************************/ + +/*-****************************************************************** + * PRIVATE DEFINITIONS : + * Do not use these definitions directly. + * They are merely exposed to allow static allocation of `LZ4_streamHC_t`. + * Declare an `LZ4_streamHC_t` directly, rather than any type below. + * Even then, only do so in the context of static linking, as definitions may change between versions. + ********************************************************************/ + +#define LZ4HC_DICTIONARY_LOGSIZE 16 +#define LZ4HC_MAXD (1<= LZ4HC_CLEVEL_OPT_MIN. + */ +LZ4LIB_STATIC_API void LZ4_favorDecompressionSpeed( + LZ4_streamHC_t* LZ4_streamHCPtr, int favor); + +/*! LZ4_resetStreamHC_fast() : v1.9.0+ + * When an LZ4_streamHC_t is known to be in a internally coherent state, + * it can often be prepared for a new compression with almost no work, only + * sometimes falling back to the full, expensive reset that is always required + * when the stream is in an indeterminate state (i.e., the reset performed by + * LZ4_resetStreamHC()). + * + * LZ4_streamHCs are guaranteed to be in a valid state when: + * - returned from LZ4_createStreamHC() + * - reset by LZ4_resetStreamHC() + * - memset(stream, 0, sizeof(LZ4_streamHC_t)) + * - the stream was in a valid state and was reset by LZ4_resetStreamHC_fast() + * - the stream was in a valid state and was then used in any compression call + * that returned success + * - the stream was in an indeterminate state and was used in a compression + * call that fully reset the state (LZ4_compress_HC_extStateHC()) and that + * returned success + * + * Note: + * A stream that was last used in a compression call that returned an error + * may be passed to this function. However, it will be fully reset, which will + * clear any existing history and settings from the context. + */ +LZ4LIB_STATIC_API void LZ4_resetStreamHC_fast( + LZ4_streamHC_t* LZ4_streamHCPtr, int compressionLevel); + +/*! LZ4_compress_HC_extStateHC_fastReset() : + * A variant of LZ4_compress_HC_extStateHC(). + * + * Using this variant avoids an expensive initialization step. It is only safe + * to call if the state buffer is known to be correctly initialized already + * (see above comment on LZ4_resetStreamHC_fast() for a definition of + * "correctly initialized"). From a high level, the difference is that this + * function initializes the provided state with a call to + * LZ4_resetStreamHC_fast() while LZ4_compress_HC_extStateHC() starts with a + * call to LZ4_resetStreamHC(). + */ +LZ4LIB_STATIC_API int LZ4_compress_HC_extStateHC_fastReset ( + void* state, + const char* src, char* dst, + int srcSize, int dstCapacity, + int compressionLevel); + +/*! LZ4_attach_HC_dictionary() : + * This is an experimental API that allows for the efficient use of a + * static dictionary many times. + * + * Rather than re-loading the dictionary buffer into a working context before + * each compression, or copying a pre-loaded dictionary's LZ4_streamHC_t into a + * working LZ4_streamHC_t, this function introduces a no-copy setup mechanism, + * in which the working stream references the dictionary stream in-place. + * + * Several assumptions are made about the state of the dictionary stream. + * Currently, only streams which have been prepared by LZ4_loadDictHC() should + * be expected to work. + * + * Alternatively, the provided dictionary stream pointer may be NULL, in which + * case any existing dictionary stream is unset. + * + * A dictionary should only be attached to a stream without any history (i.e., + * a stream that has just been reset). + * + * The dictionary will remain attached to the working stream only for the + * current stream session. Calls to LZ4_resetStreamHC(_fast) will remove the + * dictionary context association from the working stream. The dictionary + * stream (and source buffer) must remain in-place / accessible / unchanged + * through the lifetime of the stream session. + */ +LZ4LIB_STATIC_API void LZ4_attach_HC_dictionary( + LZ4_streamHC_t *working_stream, + const LZ4_streamHC_t *dictionary_stream); + +#if defined (__cplusplus) +} +#endif + +#endif /* LZ4_HC_SLO_098092834 */ +#endif /* LZ4_HC_STATIC_LINKING_ONLY */ diff --git a/c-blosc/internal-complibs/zlib-1.3.1/adler32.c b/c-blosc/internal-complibs/zlib-1.3.1/adler32.c new file mode 100644 index 0000000..04b81d2 --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.3.1/adler32.c @@ -0,0 +1,164 @@ +/* adler32.c -- compute the Adler-32 checksum of a data stream + * Copyright (C) 1995-2011, 2016 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* @(#) $Id$ */ + +#include "zutil.h" + +#define BASE 65521U /* largest prime smaller than 65536 */ +#define NMAX 5552 +/* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */ + +#define DO1(buf,i) {adler += (buf)[i]; sum2 += adler;} +#define DO2(buf,i) DO1(buf,i); DO1(buf,i+1); +#define DO4(buf,i) DO2(buf,i); DO2(buf,i+2); +#define DO8(buf,i) DO4(buf,i); DO4(buf,i+4); +#define DO16(buf) DO8(buf,0); DO8(buf,8); + +/* use NO_DIVIDE if your processor does not do division in hardware -- + try it both ways to see which is faster */ +#ifdef NO_DIVIDE +/* note that this assumes BASE is 65521, where 65536 % 65521 == 15 + (thank you to John Reiser for pointing this out) */ +# define CHOP(a) \ + do { \ + unsigned long tmp = a >> 16; \ + a &= 0xffffUL; \ + a += (tmp << 4) - tmp; \ + } while (0) +# define MOD28(a) \ + do { \ + CHOP(a); \ + if (a >= BASE) a -= BASE; \ + } while (0) +# define MOD(a) \ + do { \ + CHOP(a); \ + MOD28(a); \ + } while (0) +# define MOD63(a) \ + do { /* this assumes a is not negative */ \ + z_off64_t tmp = a >> 32; \ + a &= 0xffffffffL; \ + a += (tmp << 8) - (tmp << 5) + tmp; \ + tmp = a >> 16; \ + a &= 0xffffL; \ + a += (tmp << 4) - tmp; \ + tmp = a >> 16; \ + a &= 0xffffL; \ + a += (tmp << 4) - tmp; \ + if (a >= BASE) a -= BASE; \ + } while (0) +#else +# define MOD(a) a %= BASE +# define MOD28(a) a %= BASE +# define MOD63(a) a %= BASE +#endif + +/* ========================================================================= */ +uLong ZEXPORT adler32_z(uLong adler, const Bytef *buf, z_size_t len) { + unsigned long sum2; + unsigned n; + + /* split Adler-32 into component sums */ + sum2 = (adler >> 16) & 0xffff; + adler &= 0xffff; + + /* in case user likes doing a byte at a time, keep it fast */ + if (len == 1) { + adler += buf[0]; + if (adler >= BASE) + adler -= BASE; + sum2 += adler; + if (sum2 >= BASE) + sum2 -= BASE; + return adler | (sum2 << 16); + } + + /* initial Adler-32 value (deferred check for len == 1 speed) */ + if (buf == Z_NULL) + return 1L; + + /* in case short lengths are provided, keep it somewhat fast */ + if (len < 16) { + while (len--) { + adler += *buf++; + sum2 += adler; + } + if (adler >= BASE) + adler -= BASE; + MOD28(sum2); /* only added so many BASE's */ + return adler | (sum2 << 16); + } + + /* do length NMAX blocks -- requires just one modulo operation */ + while (len >= NMAX) { + len -= NMAX; + n = NMAX / 16; /* NMAX is divisible by 16 */ + do { + DO16(buf); /* 16 sums unrolled */ + buf += 16; + } while (--n); + MOD(adler); + MOD(sum2); + } + + /* do remaining bytes (less than NMAX, still just one modulo) */ + if (len) { /* avoid modulos if none remaining */ + while (len >= 16) { + len -= 16; + DO16(buf); + buf += 16; + } + while (len--) { + adler += *buf++; + sum2 += adler; + } + MOD(adler); + MOD(sum2); + } + + /* return recombined sums */ + return adler | (sum2 << 16); +} + +/* ========================================================================= */ +uLong ZEXPORT adler32(uLong adler, const Bytef *buf, uInt len) { + return adler32_z(adler, buf, len); +} + +/* ========================================================================= */ +local uLong adler32_combine_(uLong adler1, uLong adler2, z_off64_t len2) { + unsigned long sum1; + unsigned long sum2; + unsigned rem; + + /* for negative len, return invalid adler32 as a clue for debugging */ + if (len2 < 0) + return 0xffffffffUL; + + /* the derivation of this formula is left as an exercise for the reader */ + MOD63(len2); /* assumes len2 >= 0 */ + rem = (unsigned)len2; + sum1 = adler1 & 0xffff; + sum2 = rem * sum1; + MOD(sum2); + sum1 += (adler2 & 0xffff) + BASE - 1; + sum2 += ((adler1 >> 16) & 0xffff) + ((adler2 >> 16) & 0xffff) + BASE - rem; + if (sum1 >= BASE) sum1 -= BASE; + if (sum1 >= BASE) sum1 -= BASE; + if (sum2 >= ((unsigned long)BASE << 1)) sum2 -= ((unsigned long)BASE << 1); + if (sum2 >= BASE) sum2 -= BASE; + return sum1 | (sum2 << 16); +} + +/* ========================================================================= */ +uLong ZEXPORT adler32_combine(uLong adler1, uLong adler2, z_off_t len2) { + return adler32_combine_(adler1, adler2, len2); +} + +uLong ZEXPORT adler32_combine64(uLong adler1, uLong adler2, z_off64_t len2) { + return adler32_combine_(adler1, adler2, len2); +} diff --git a/c-blosc/internal-complibs/zlib-1.3.1/compress.c b/c-blosc/internal-complibs/zlib-1.3.1/compress.c new file mode 100644 index 0000000..f43bacf --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.3.1/compress.c @@ -0,0 +1,75 @@ +/* compress.c -- compress a memory buffer + * Copyright (C) 1995-2005, 2014, 2016 Jean-loup Gailly, Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* @(#) $Id$ */ + +#define ZLIB_INTERNAL +#include "zlib.h" + +/* =========================================================================== + Compresses the source buffer into the destination buffer. The level + parameter has the same meaning as in deflateInit. sourceLen is the byte + length of the source buffer. Upon entry, destLen is the total size of the + destination buffer, which must be at least 0.1% larger than sourceLen plus + 12 bytes. Upon exit, destLen is the actual size of the compressed buffer. + + compress2 returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_BUF_ERROR if there was not enough room in the output buffer, + Z_STREAM_ERROR if the level parameter is invalid. +*/ +int ZEXPORT compress2(Bytef *dest, uLongf *destLen, const Bytef *source, + uLong sourceLen, int level) { + z_stream stream; + int err; + const uInt max = (uInt)-1; + uLong left; + + left = *destLen; + *destLen = 0; + + stream.zalloc = (alloc_func)0; + stream.zfree = (free_func)0; + stream.opaque = (voidpf)0; + + err = deflateInit(&stream, level); + if (err != Z_OK) return err; + + stream.next_out = dest; + stream.avail_out = 0; + stream.next_in = (z_const Bytef *)source; + stream.avail_in = 0; + + do { + if (stream.avail_out == 0) { + stream.avail_out = left > (uLong)max ? max : (uInt)left; + left -= stream.avail_out; + } + if (stream.avail_in == 0) { + stream.avail_in = sourceLen > (uLong)max ? max : (uInt)sourceLen; + sourceLen -= stream.avail_in; + } + err = deflate(&stream, sourceLen ? Z_NO_FLUSH : Z_FINISH); + } while (err == Z_OK); + + *destLen = stream.total_out; + deflateEnd(&stream); + return err == Z_STREAM_END ? Z_OK : err; +} + +/* =========================================================================== + */ +int ZEXPORT compress(Bytef *dest, uLongf *destLen, const Bytef *source, + uLong sourceLen) { + return compress2(dest, destLen, source, sourceLen, Z_DEFAULT_COMPRESSION); +} + +/* =========================================================================== + If the default memLevel or windowBits for deflateInit() is changed, then + this function needs to be updated. + */ +uLong ZEXPORT compressBound(uLong sourceLen) { + return sourceLen + (sourceLen >> 12) + (sourceLen >> 14) + + (sourceLen >> 25) + 13; +} diff --git a/c-blosc/internal-complibs/zlib-1.3.1/contrib/blast/blast.c b/c-blosc/internal-complibs/zlib-1.3.1/contrib/blast/blast.c new file mode 100644 index 0000000..e6e6590 --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.3.1/contrib/blast/blast.c @@ -0,0 +1,466 @@ +/* blast.c + * Copyright (C) 2003, 2012, 2013 Mark Adler + * For conditions of distribution and use, see copyright notice in blast.h + * version 1.3, 24 Aug 2013 + * + * blast.c decompresses data compressed by the PKWare Compression Library. + * This function provides functionality similar to the explode() function of + * the PKWare library, hence the name "blast". + * + * This decompressor is based on the excellent format description provided by + * Ben Rudiak-Gould in comp.compression on August 13, 2001. Interestingly, the + * example Ben provided in the post is incorrect. The distance 110001 should + * instead be 111000. When corrected, the example byte stream becomes: + * + * 00 04 82 24 25 8f 80 7f + * + * which decompresses to "AIAIAIAIAIAIA" (without the quotes). + */ + +/* + * Change history: + * + * 1.0 12 Feb 2003 - First version + * 1.1 16 Feb 2003 - Fixed distance check for > 4 GB uncompressed data + * 1.2 24 Oct 2012 - Add note about using binary mode in stdio + * - Fix comparisons of differently signed integers + * 1.3 24 Aug 2013 - Return unused input from blast() + * - Fix test code to correctly report unused input + * - Enable the provision of initial input to blast() + */ + +#include /* for NULL */ +#include /* for setjmp(), longjmp(), and jmp_buf */ +#include "blast.h" /* prototype for blast() */ + +#define local static /* for local function definitions */ +#define MAXBITS 13 /* maximum code length */ +#define MAXWIN 4096 /* maximum window size */ + +/* input and output state */ +struct state { + /* input state */ + blast_in infun; /* input function provided by user */ + void *inhow; /* opaque information passed to infun() */ + unsigned char *in; /* next input location */ + unsigned left; /* available input at in */ + int bitbuf; /* bit buffer */ + int bitcnt; /* number of bits in bit buffer */ + + /* input limit error return state for bits() and decode() */ + jmp_buf env; + + /* output state */ + blast_out outfun; /* output function provided by user */ + void *outhow; /* opaque information passed to outfun() */ + unsigned next; /* index of next write location in out[] */ + int first; /* true to check distances (for first 4K) */ + unsigned char out[MAXWIN]; /* output buffer and sliding window */ +}; + +/* + * Return need bits from the input stream. This always leaves less than + * eight bits in the buffer. bits() works properly for need == 0. + * + * Format notes: + * + * - Bits are stored in bytes from the least significant bit to the most + * significant bit. Therefore bits are dropped from the bottom of the bit + * buffer, using shift right, and new bytes are appended to the top of the + * bit buffer, using shift left. + */ +local int bits(struct state *s, int need) +{ + int val; /* bit accumulator */ + + /* load at least need bits into val */ + val = s->bitbuf; + while (s->bitcnt < need) { + if (s->left == 0) { + s->left = s->infun(s->inhow, &(s->in)); + if (s->left == 0) longjmp(s->env, 1); /* out of input */ + } + val |= (int)(*(s->in)++) << s->bitcnt; /* load eight bits */ + s->left--; + s->bitcnt += 8; + } + + /* drop need bits and update buffer, always zero to seven bits left */ + s->bitbuf = val >> need; + s->bitcnt -= need; + + /* return need bits, zeroing the bits above that */ + return val & ((1 << need) - 1); +} + +/* + * Huffman code decoding tables. count[1..MAXBITS] is the number of symbols of + * each length, which for a canonical code are stepped through in order. + * symbol[] are the symbol values in canonical order, where the number of + * entries is the sum of the counts in count[]. The decoding process can be + * seen in the function decode() below. + */ +struct huffman { + short *count; /* number of symbols of each length */ + short *symbol; /* canonically ordered symbols */ +}; + +/* + * Decode a code from the stream s using huffman table h. Return the symbol or + * a negative value if there is an error. If all of the lengths are zero, i.e. + * an empty code, or if the code is incomplete and an invalid code is received, + * then -9 is returned after reading MAXBITS bits. + * + * Format notes: + * + * - The codes as stored in the compressed data are bit-reversed relative to + * a simple integer ordering of codes of the same lengths. Hence below the + * bits are pulled from the compressed data one at a time and used to + * build the code value reversed from what is in the stream in order to + * permit simple integer comparisons for decoding. + * + * - The first code for the shortest length is all ones. Subsequent codes of + * the same length are simply integer decrements of the previous code. When + * moving up a length, a one bit is appended to the code. For a complete + * code, the last code of the longest length will be all zeros. To support + * this ordering, the bits pulled during decoding are inverted to apply the + * more "natural" ordering starting with all zeros and incrementing. + */ +local int decode(struct state *s, struct huffman *h) +{ + int len; /* current number of bits in code */ + int code; /* len bits being decoded */ + int first; /* first code of length len */ + int count; /* number of codes of length len */ + int index; /* index of first code of length len in symbol table */ + int bitbuf; /* bits from stream */ + int left; /* bits left in next or left to process */ + short *next; /* next number of codes */ + + bitbuf = s->bitbuf; + left = s->bitcnt; + code = first = index = 0; + len = 1; + next = h->count + 1; + while (1) { + while (left--) { + code |= (bitbuf & 1) ^ 1; /* invert code */ + bitbuf >>= 1; + count = *next++; + if (code < first + count) { /* if length len, return symbol */ + s->bitbuf = bitbuf; + s->bitcnt = (s->bitcnt - len) & 7; + return h->symbol[index + (code - first)]; + } + index += count; /* else update for next length */ + first += count; + first <<= 1; + code <<= 1; + len++; + } + left = (MAXBITS+1) - len; + if (left == 0) break; + if (s->left == 0) { + s->left = s->infun(s->inhow, &(s->in)); + if (s->left == 0) longjmp(s->env, 1); /* out of input */ + } + bitbuf = *(s->in)++; + s->left--; + if (left > 8) left = 8; + } + return -9; /* ran out of codes */ +} + +/* + * Given a list of repeated code lengths rep[0..n-1], where each byte is a + * count (high four bits + 1) and a code length (low four bits), generate the + * list of code lengths. This compaction reduces the size of the object code. + * Then given the list of code lengths length[0..n-1] representing a canonical + * Huffman code for n symbols, construct the tables required to decode those + * codes. Those tables are the number of codes of each length, and the symbols + * sorted by length, retaining their original order within each length. The + * return value is zero for a complete code set, negative for an over- + * subscribed code set, and positive for an incomplete code set. The tables + * can be used if the return value is zero or positive, but they cannot be used + * if the return value is negative. If the return value is zero, it is not + * possible for decode() using that table to return an error--any stream of + * enough bits will resolve to a symbol. If the return value is positive, then + * it is possible for decode() using that table to return an error for received + * codes past the end of the incomplete lengths. + */ +local int construct(struct huffman *h, const unsigned char *rep, int n) +{ + int symbol; /* current symbol when stepping through length[] */ + int len; /* current length when stepping through h->count[] */ + int left; /* number of possible codes left of current length */ + short offs[MAXBITS+1]; /* offsets in symbol table for each length */ + short length[256]; /* code lengths */ + + /* convert compact repeat counts into symbol bit length list */ + symbol = 0; + do { + len = *rep++; + left = (len >> 4) + 1; + len &= 15; + do { + length[symbol++] = len; + } while (--left); + } while (--n); + n = symbol; + + /* count number of codes of each length */ + for (len = 0; len <= MAXBITS; len++) + h->count[len] = 0; + for (symbol = 0; symbol < n; symbol++) + (h->count[length[symbol]])++; /* assumes lengths are within bounds */ + if (h->count[0] == n) /* no codes! */ + return 0; /* complete, but decode() will fail */ + + /* check for an over-subscribed or incomplete set of lengths */ + left = 1; /* one possible code of zero length */ + for (len = 1; len <= MAXBITS; len++) { + left <<= 1; /* one more bit, double codes left */ + left -= h->count[len]; /* deduct count from possible codes */ + if (left < 0) return left; /* over-subscribed--return negative */ + } /* left > 0 means incomplete */ + + /* generate offsets into symbol table for each length for sorting */ + offs[1] = 0; + for (len = 1; len < MAXBITS; len++) + offs[len + 1] = offs[len] + h->count[len]; + + /* + * put symbols in table sorted by length, by symbol order within each + * length + */ + for (symbol = 0; symbol < n; symbol++) + if (length[symbol] != 0) + h->symbol[offs[length[symbol]]++] = symbol; + + /* return zero for complete set, positive for incomplete set */ + return left; +} + +/* + * Decode PKWare Compression Library stream. + * + * Format notes: + * + * - First byte is 0 if literals are uncoded or 1 if they are coded. Second + * byte is 4, 5, or 6 for the number of extra bits in the distance code. + * This is the base-2 logarithm of the dictionary size minus six. + * + * - Compressed data is a combination of literals and length/distance pairs + * terminated by an end code. Literals are either Huffman coded or + * uncoded bytes. A length/distance pair is a coded length followed by a + * coded distance to represent a string that occurs earlier in the + * uncompressed data that occurs again at the current location. + * + * - A bit preceding a literal or length/distance pair indicates which comes + * next, 0 for literals, 1 for length/distance. + * + * - If literals are uncoded, then the next eight bits are the literal, in the + * normal bit order in the stream, i.e. no bit-reversal is needed. Similarly, + * no bit reversal is needed for either the length extra bits or the distance + * extra bits. + * + * - Literal bytes are simply written to the output. A length/distance pair is + * an instruction to copy previously uncompressed bytes to the output. The + * copy is from distance bytes back in the output stream, copying for length + * bytes. + * + * - Distances pointing before the beginning of the output data are not + * permitted. + * + * - Overlapped copies, where the length is greater than the distance, are + * allowed and common. For example, a distance of one and a length of 518 + * simply copies the last byte 518 times. A distance of four and a length of + * twelve copies the last four bytes three times. A simple forward copy + * ignoring whether the length is greater than the distance or not implements + * this correctly. + */ +local int decomp(struct state *s) +{ + int lit; /* true if literals are coded */ + int dict; /* log2(dictionary size) - 6 */ + int symbol; /* decoded symbol, extra bits for distance */ + int len; /* length for copy */ + unsigned dist; /* distance for copy */ + int copy; /* copy counter */ + unsigned char *from, *to; /* copy pointers */ + static int virgin = 1; /* build tables once */ + static short litcnt[MAXBITS+1], litsym[256]; /* litcode memory */ + static short lencnt[MAXBITS+1], lensym[16]; /* lencode memory */ + static short distcnt[MAXBITS+1], distsym[64]; /* distcode memory */ + static struct huffman litcode = {litcnt, litsym}; /* length code */ + static struct huffman lencode = {lencnt, lensym}; /* length code */ + static struct huffman distcode = {distcnt, distsym};/* distance code */ + /* bit lengths of literal codes */ + static const unsigned char litlen[] = { + 11, 124, 8, 7, 28, 7, 188, 13, 76, 4, 10, 8, 12, 10, 12, 10, 8, 23, 8, + 9, 7, 6, 7, 8, 7, 6, 55, 8, 23, 24, 12, 11, 7, 9, 11, 12, 6, 7, 22, 5, + 7, 24, 6, 11, 9, 6, 7, 22, 7, 11, 38, 7, 9, 8, 25, 11, 8, 11, 9, 12, + 8, 12, 5, 38, 5, 38, 5, 11, 7, 5, 6, 21, 6, 10, 53, 8, 7, 24, 10, 27, + 44, 253, 253, 253, 252, 252, 252, 13, 12, 45, 12, 45, 12, 61, 12, 45, + 44, 173}; + /* bit lengths of length codes 0..15 */ + static const unsigned char lenlen[] = {2, 35, 36, 53, 38, 23}; + /* bit lengths of distance codes 0..63 */ + static const unsigned char distlen[] = {2, 20, 53, 230, 247, 151, 248}; + static const short base[16] = { /* base for length codes */ + 3, 2, 4, 5, 6, 7, 8, 9, 10, 12, 16, 24, 40, 72, 136, 264}; + static const char extra[16] = { /* extra bits for length codes */ + 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8}; + + /* set up decoding tables (once--might not be thread-safe) */ + if (virgin) { + construct(&litcode, litlen, sizeof(litlen)); + construct(&lencode, lenlen, sizeof(lenlen)); + construct(&distcode, distlen, sizeof(distlen)); + virgin = 0; + } + + /* read header */ + lit = bits(s, 8); + if (lit > 1) return -1; + dict = bits(s, 8); + if (dict < 4 || dict > 6) return -2; + + /* decode literals and length/distance pairs */ + do { + if (bits(s, 1)) { + /* get length */ + symbol = decode(s, &lencode); + len = base[symbol] + bits(s, extra[symbol]); + if (len == 519) break; /* end code */ + + /* get distance */ + symbol = len == 2 ? 2 : dict; + dist = decode(s, &distcode) << symbol; + dist += bits(s, symbol); + dist++; + if (s->first && dist > s->next) + return -3; /* distance too far back */ + + /* copy length bytes from distance bytes back */ + do { + to = s->out + s->next; + from = to - dist; + copy = MAXWIN; + if (s->next < dist) { + from += copy; + copy = dist; + } + copy -= s->next; + if (copy > len) copy = len; + len -= copy; + s->next += copy; + do { + *to++ = *from++; + } while (--copy); + if (s->next == MAXWIN) { + if (s->outfun(s->outhow, s->out, s->next)) return 1; + s->next = 0; + s->first = 0; + } + } while (len != 0); + } + else { + /* get literal and write it */ + symbol = lit ? decode(s, &litcode) : bits(s, 8); + s->out[s->next++] = symbol; + if (s->next == MAXWIN) { + if (s->outfun(s->outhow, s->out, s->next)) return 1; + s->next = 0; + s->first = 0; + } + } + } while (1); + return 0; +} + +/* See comments in blast.h */ +int blast(blast_in infun, void *inhow, blast_out outfun, void *outhow, + unsigned *left, unsigned char **in) +{ + struct state s; /* input/output state */ + int err; /* return value */ + + /* initialize input state */ + s.infun = infun; + s.inhow = inhow; + if (left != NULL && *left) { + s.left = *left; + s.in = *in; + } + else + s.left = 0; + s.bitbuf = 0; + s.bitcnt = 0; + + /* initialize output state */ + s.outfun = outfun; + s.outhow = outhow; + s.next = 0; + s.first = 1; + + /* return if bits() or decode() tries to read past available input */ + if (setjmp(s.env) != 0) /* if came back here via longjmp(), */ + err = 2; /* then skip decomp(), return error */ + else + err = decomp(&s); /* decompress */ + + /* return unused input */ + if (left != NULL) + *left = s.left; + if (in != NULL) + *in = s.left ? s.in : NULL; + + /* write any leftover output and update the error code if needed */ + if (err != 1 && s.next && s.outfun(s.outhow, s.out, s.next) && err == 0) + err = 1; + return err; +} + +#ifdef TEST +/* Example of how to use blast() */ +#include +#include + +#define CHUNK 16384 + +local unsigned inf(void *how, unsigned char **buf) +{ + static unsigned char hold[CHUNK]; + + *buf = hold; + return fread(hold, 1, CHUNK, (FILE *)how); +} + +local int outf(void *how, unsigned char *buf, unsigned len) +{ + return fwrite(buf, 1, len, (FILE *)how) != len; +} + +/* Decompress a PKWare Compression Library stream from stdin to stdout */ +int main(void) +{ + int ret; + unsigned left; + + /* decompress to stdout */ + left = 0; + ret = blast(inf, stdin, outf, stdout, &left, NULL); + if (ret != 0) + fprintf(stderr, "blast error: %d\n", ret); + + /* count any leftover bytes */ + while (getchar() != EOF) + left++; + if (left) + fprintf(stderr, "blast warning: %u unused bytes of input\n", left); + + /* return blast() error code */ + return ret; +} +#endif diff --git a/c-blosc/internal-complibs/zlib-1.3.1/contrib/blast/blast.h b/c-blosc/internal-complibs/zlib-1.3.1/contrib/blast/blast.h new file mode 100644 index 0000000..ef8544c --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.3.1/contrib/blast/blast.h @@ -0,0 +1,83 @@ +/* blast.h -- interface for blast.c + Copyright (C) 2003, 2012, 2013 Mark Adler + version 1.3, 24 Aug 2013 + + This software is provided 'as-is', without any express or implied + warranty. In no event will the author be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + Mark Adler madler@alumni.caltech.edu + */ + + +/* + * blast() decompresses the PKWare Data Compression Library (DCL) compressed + * format. It provides the same functionality as the explode() function in + * that library. (Note: PKWare overused the "implode" verb, and the format + * used by their library implode() function is completely different and + * incompatible with the implode compression method supported by PKZIP.) + * + * The binary mode for stdio functions should be used to assure that the + * compressed data is not corrupted when read or written. For example: + * fopen(..., "rb") and fopen(..., "wb"). + */ + + +typedef unsigned (*blast_in)(void *how, unsigned char **buf); +typedef int (*blast_out)(void *how, unsigned char *buf, unsigned len); +/* Definitions for input/output functions passed to blast(). See below for + * what the provided functions need to do. + */ + + +int blast(blast_in infun, void *inhow, blast_out outfun, void *outhow, + unsigned *left, unsigned char **in); +/* Decompress input to output using the provided infun() and outfun() calls. + * On success, the return value of blast() is zero. If there is an error in + * the source data, i.e. it is not in the proper format, then a negative value + * is returned. If there is not enough input available or there is not enough + * output space, then a positive error is returned. + * + * The input function is invoked: len = infun(how, &buf), where buf is set by + * infun() to point to the input buffer, and infun() returns the number of + * available bytes there. If infun() returns zero, then blast() returns with + * an input error. (blast() only asks for input if it needs it.) inhow is for + * use by the application to pass an input descriptor to infun(), if desired. + * + * If left and in are not NULL and *left is not zero when blast() is called, + * then the *left bytes at *in are consumed for input before infun() is used. + * + * The output function is invoked: err = outfun(how, buf, len), where the bytes + * to be written are buf[0..len-1]. If err is not zero, then blast() returns + * with an output error. outfun() is always called with len <= 4096. outhow + * is for use by the application to pass an output descriptor to outfun(), if + * desired. + * + * If there is any unused input, *left is set to the number of bytes that were + * read and *in points to them. Otherwise *left is set to zero and *in is set + * to NULL. If left or in are NULL, then they are not set. + * + * The return codes are: + * + * 2: ran out of input before completing decompression + * 1: output error before completing decompression + * 0: successful decompression + * -1: literal flag not zero or one + * -2: dictionary size not in 4..6 + * -3: distance is too far back + * + * At the bottom of blast.c is an example program that uses blast() that can be + * compiled to produce a command-line decompression filter by defining TEST. + */ diff --git a/c-blosc/internal-complibs/zlib-1.3.1/contrib/infback9/infback9.c b/c-blosc/internal-complibs/zlib-1.3.1/contrib/infback9/infback9.c new file mode 100644 index 0000000..742a392 --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.3.1/contrib/infback9/infback9.c @@ -0,0 +1,603 @@ +/* infback9.c -- inflate deflate64 data using a call-back interface + * Copyright (C) 1995-2008 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#include "zutil.h" +#include "infback9.h" +#include "inftree9.h" +#include "inflate9.h" + +#define WSIZE 65536UL + +/* + strm provides memory allocation functions in zalloc and zfree, or + Z_NULL to use the library memory allocation functions. + + window is a user-supplied window and output buffer that is 64K bytes. + */ +int ZEXPORT inflateBack9Init_(z_stream FAR *strm, unsigned char FAR *window, + const char *version, int stream_size) { + struct inflate_state FAR *state; + + if (version == Z_NULL || version[0] != ZLIB_VERSION[0] || + stream_size != (int)(sizeof(z_stream))) + return Z_VERSION_ERROR; + if (strm == Z_NULL || window == Z_NULL) + return Z_STREAM_ERROR; + strm->msg = Z_NULL; /* in case we return an error */ + if (strm->zalloc == (alloc_func)0) { + strm->zalloc = zcalloc; + strm->opaque = (voidpf)0; + } + if (strm->zfree == (free_func)0) strm->zfree = zcfree; + state = (struct inflate_state FAR *)ZALLOC(strm, 1, + sizeof(struct inflate_state)); + if (state == Z_NULL) return Z_MEM_ERROR; + Tracev((stderr, "inflate: allocated\n")); + strm->state = (voidpf)state; + state->window = window; + return Z_OK; +} + +/* + Build and output length and distance decoding tables for fixed code + decoding. + */ +#ifdef MAKEFIXED +#include + +void makefixed9(void) { + unsigned sym, bits, low, size; + code *next, *lenfix, *distfix; + struct inflate_state state; + code fixed[544]; + + /* literal/length table */ + sym = 0; + while (sym < 144) state.lens[sym++] = 8; + while (sym < 256) state.lens[sym++] = 9; + while (sym < 280) state.lens[sym++] = 7; + while (sym < 288) state.lens[sym++] = 8; + next = fixed; + lenfix = next; + bits = 9; + inflate_table9(LENS, state.lens, 288, &(next), &(bits), state.work); + + /* distance table */ + sym = 0; + while (sym < 32) state.lens[sym++] = 5; + distfix = next; + bits = 5; + inflate_table9(DISTS, state.lens, 32, &(next), &(bits), state.work); + + /* write tables */ + puts(" /* inffix9.h -- table for decoding deflate64 fixed codes"); + puts(" * Generated automatically by makefixed9()."); + puts(" */"); + puts(""); + puts(" /* WARNING: this file should *not* be used by applications."); + puts(" It is part of the implementation of this library and is"); + puts(" subject to change. Applications should only use zlib.h."); + puts(" */"); + puts(""); + size = 1U << 9; + printf(" static const code lenfix[%u] = {", size); + low = 0; + for (;;) { + if ((low % 6) == 0) printf("\n "); + printf("{%u,%u,%d}", lenfix[low].op, lenfix[low].bits, + lenfix[low].val); + if (++low == size) break; + putchar(','); + } + puts("\n };"); + size = 1U << 5; + printf("\n static const code distfix[%u] = {", size); + low = 0; + for (;;) { + if ((low % 5) == 0) printf("\n "); + printf("{%u,%u,%d}", distfix[low].op, distfix[low].bits, + distfix[low].val); + if (++low == size) break; + putchar(','); + } + puts("\n };"); +} +#endif /* MAKEFIXED */ + +/* Macros for inflateBack(): */ + +/* Clear the input bit accumulator */ +#define INITBITS() \ + do { \ + hold = 0; \ + bits = 0; \ + } while (0) + +/* Assure that some input is available. If input is requested, but denied, + then return a Z_BUF_ERROR from inflateBack(). */ +#define PULL() \ + do { \ + if (have == 0) { \ + have = in(in_desc, &next); \ + if (have == 0) { \ + next = Z_NULL; \ + ret = Z_BUF_ERROR; \ + goto inf_leave; \ + } \ + } \ + } while (0) + +/* Get a byte of input into the bit accumulator, or return from inflateBack() + with an error if there is no input available. */ +#define PULLBYTE() \ + do { \ + PULL(); \ + have--; \ + hold += (unsigned long)(*next++) << bits; \ + bits += 8; \ + } while (0) + +/* Assure that there are at least n bits in the bit accumulator. If there is + not enough available input to do that, then return from inflateBack() with + an error. */ +#define NEEDBITS(n) \ + do { \ + while (bits < (unsigned)(n)) \ + PULLBYTE(); \ + } while (0) + +/* Return the low n bits of the bit accumulator (n <= 16) */ +#define BITS(n) \ + ((unsigned)hold & ((1U << (n)) - 1)) + +/* Remove n bits from the bit accumulator */ +#define DROPBITS(n) \ + do { \ + hold >>= (n); \ + bits -= (unsigned)(n); \ + } while (0) + +/* Remove zero to seven bits as needed to go to a byte boundary */ +#define BYTEBITS() \ + do { \ + hold >>= bits & 7; \ + bits -= bits & 7; \ + } while (0) + +/* Assure that some output space is available, by writing out the window + if it's full. If the write fails, return from inflateBack() with a + Z_BUF_ERROR. */ +#define ROOM() \ + do { \ + if (left == 0) { \ + put = window; \ + left = WSIZE; \ + wrap = 1; \ + if (out(out_desc, put, (unsigned)left)) { \ + ret = Z_BUF_ERROR; \ + goto inf_leave; \ + } \ + } \ + } while (0) + +/* + strm provides the memory allocation functions and window buffer on input, + and provides information on the unused input on return. For Z_DATA_ERROR + returns, strm will also provide an error message. + + in() and out() are the call-back input and output functions. When + inflateBack() needs more input, it calls in(). When inflateBack() has + filled the window with output, or when it completes with data in the + window, it calls out() to write out the data. The application must not + change the provided input until in() is called again or inflateBack() + returns. The application must not change the window/output buffer until + inflateBack() returns. + + in() and out() are called with a descriptor parameter provided in the + inflateBack() call. This parameter can be a structure that provides the + information required to do the read or write, as well as accumulated + information on the input and output such as totals and check values. + + in() should return zero on failure. out() should return non-zero on + failure. If either in() or out() fails, than inflateBack() returns a + Z_BUF_ERROR. strm->next_in can be checked for Z_NULL to see whether it + was in() or out() that caused in the error. Otherwise, inflateBack() + returns Z_STREAM_END on success, Z_DATA_ERROR for an deflate format + error, or Z_MEM_ERROR if it could not allocate memory for the state. + inflateBack() can also return Z_STREAM_ERROR if the input parameters + are not correct, i.e. strm is Z_NULL or the state was not initialized. + */ +int ZEXPORT inflateBack9(z_stream FAR *strm, in_func in, void FAR *in_desc, + out_func out, void FAR *out_desc) { + struct inflate_state FAR *state; + z_const unsigned char FAR *next; /* next input */ + unsigned char FAR *put; /* next output */ + unsigned have; /* available input */ + unsigned long left; /* available output */ + inflate_mode mode; /* current inflate mode */ + int lastblock; /* true if processing last block */ + int wrap; /* true if the window has wrapped */ + unsigned char FAR *window; /* allocated sliding window, if needed */ + unsigned long hold; /* bit buffer */ + unsigned bits; /* bits in bit buffer */ + unsigned extra; /* extra bits needed */ + unsigned long length; /* literal or length of data to copy */ + unsigned long offset; /* distance back to copy string from */ + unsigned long copy; /* number of stored or match bytes to copy */ + unsigned char FAR *from; /* where to copy match bytes from */ + code const FAR *lencode; /* starting table for length/literal codes */ + code const FAR *distcode; /* starting table for distance codes */ + unsigned lenbits; /* index bits for lencode */ + unsigned distbits; /* index bits for distcode */ + code here; /* current decoding table entry */ + code last; /* parent table entry */ + unsigned len; /* length to copy for repeats, bits to drop */ + int ret; /* return code */ + static const unsigned short order[19] = /* permutation of code lengths */ + {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}; +#include "inffix9.h" + + /* Check that the strm exists and that the state was initialized */ + if (strm == Z_NULL || strm->state == Z_NULL) + return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + + /* Reset the state */ + strm->msg = Z_NULL; + mode = TYPE; + lastblock = 0; + wrap = 0; + window = state->window; + next = strm->next_in; + have = next != Z_NULL ? strm->avail_in : 0; + hold = 0; + bits = 0; + put = window; + left = WSIZE; + lencode = Z_NULL; + distcode = Z_NULL; + + /* Inflate until end of block marked as last */ + for (;;) + switch (mode) { + case TYPE: + /* determine and dispatch block type */ + if (lastblock) { + BYTEBITS(); + mode = DONE; + break; + } + NEEDBITS(3); + lastblock = BITS(1); + DROPBITS(1); + switch (BITS(2)) { + case 0: /* stored block */ + Tracev((stderr, "inflate: stored block%s\n", + lastblock ? " (last)" : "")); + mode = STORED; + break; + case 1: /* fixed block */ + lencode = lenfix; + lenbits = 9; + distcode = distfix; + distbits = 5; + Tracev((stderr, "inflate: fixed codes block%s\n", + lastblock ? " (last)" : "")); + mode = LEN; /* decode codes */ + break; + case 2: /* dynamic block */ + Tracev((stderr, "inflate: dynamic codes block%s\n", + lastblock ? " (last)" : "")); + mode = TABLE; + break; + case 3: + strm->msg = (char *)"invalid block type"; + mode = BAD; + } + DROPBITS(2); + break; + + case STORED: + /* get and verify stored block length */ + BYTEBITS(); /* go to byte boundary */ + NEEDBITS(32); + if ((hold & 0xffff) != ((hold >> 16) ^ 0xffff)) { + strm->msg = (char *)"invalid stored block lengths"; + mode = BAD; + break; + } + length = (unsigned)hold & 0xffff; + Tracev((stderr, "inflate: stored length %lu\n", + length)); + INITBITS(); + + /* copy stored block from input to output */ + while (length != 0) { + copy = length; + PULL(); + ROOM(); + if (copy > have) copy = have; + if (copy > left) copy = left; + zmemcpy(put, next, copy); + have -= copy; + next += copy; + left -= copy; + put += copy; + length -= copy; + } + Tracev((stderr, "inflate: stored end\n")); + mode = TYPE; + break; + + case TABLE: + /* get dynamic table entries descriptor */ + NEEDBITS(14); + state->nlen = BITS(5) + 257; + DROPBITS(5); + state->ndist = BITS(5) + 1; + DROPBITS(5); + state->ncode = BITS(4) + 4; + DROPBITS(4); + if (state->nlen > 286) { + strm->msg = (char *)"too many length symbols"; + mode = BAD; + break; + } + Tracev((stderr, "inflate: table sizes ok\n")); + + /* get code length code lengths (not a typo) */ + state->have = 0; + while (state->have < state->ncode) { + NEEDBITS(3); + state->lens[order[state->have++]] = (unsigned short)BITS(3); + DROPBITS(3); + } + while (state->have < 19) + state->lens[order[state->have++]] = 0; + state->next = state->codes; + lencode = (code const FAR *)(state->next); + lenbits = 7; + ret = inflate_table9(CODES, state->lens, 19, &(state->next), + &(lenbits), state->work); + if (ret) { + strm->msg = (char *)"invalid code lengths set"; + mode = BAD; + break; + } + Tracev((stderr, "inflate: code lengths ok\n")); + + /* get length and distance code code lengths */ + state->have = 0; + while (state->have < state->nlen + state->ndist) { + for (;;) { + here = lencode[BITS(lenbits)]; + if ((unsigned)(here.bits) <= bits) break; + PULLBYTE(); + } + if (here.val < 16) { + NEEDBITS(here.bits); + DROPBITS(here.bits); + state->lens[state->have++] = here.val; + } + else { + if (here.val == 16) { + NEEDBITS(here.bits + 2); + DROPBITS(here.bits); + if (state->have == 0) { + strm->msg = (char *)"invalid bit length repeat"; + mode = BAD; + break; + } + len = (unsigned)(state->lens[state->have - 1]); + copy = 3 + BITS(2); + DROPBITS(2); + } + else if (here.val == 17) { + NEEDBITS(here.bits + 3); + DROPBITS(here.bits); + len = 0; + copy = 3 + BITS(3); + DROPBITS(3); + } + else { + NEEDBITS(here.bits + 7); + DROPBITS(here.bits); + len = 0; + copy = 11 + BITS(7); + DROPBITS(7); + } + if (state->have + copy > state->nlen + state->ndist) { + strm->msg = (char *)"invalid bit length repeat"; + mode = BAD; + break; + } + while (copy--) + state->lens[state->have++] = (unsigned short)len; + } + } + + /* handle error breaks in while */ + if (mode == BAD) break; + + /* check for end-of-block code (better have one) */ + if (state->lens[256] == 0) { + strm->msg = (char *)"invalid code -- missing end-of-block"; + mode = BAD; + break; + } + + /* build code tables -- note: do not change the lenbits or distbits + values here (9 and 6) without reading the comments in inftree9.h + concerning the ENOUGH constants, which depend on those values */ + state->next = state->codes; + lencode = (code const FAR *)(state->next); + lenbits = 9; + ret = inflate_table9(LENS, state->lens, state->nlen, + &(state->next), &(lenbits), state->work); + if (ret) { + strm->msg = (char *)"invalid literal/lengths set"; + mode = BAD; + break; + } + distcode = (code const FAR *)(state->next); + distbits = 6; + ret = inflate_table9(DISTS, state->lens + state->nlen, + state->ndist, &(state->next), &(distbits), + state->work); + if (ret) { + strm->msg = (char *)"invalid distances set"; + mode = BAD; + break; + } + Tracev((stderr, "inflate: codes ok\n")); + mode = LEN; + + case LEN: + /* get a literal, length, or end-of-block code */ + for (;;) { + here = lencode[BITS(lenbits)]; + if ((unsigned)(here.bits) <= bits) break; + PULLBYTE(); + } + if (here.op && (here.op & 0xf0) == 0) { + last = here; + for (;;) { + here = lencode[last.val + + (BITS(last.bits + last.op) >> last.bits)]; + if ((unsigned)(last.bits + here.bits) <= bits) break; + PULLBYTE(); + } + DROPBITS(last.bits); + } + DROPBITS(here.bits); + length = (unsigned)here.val; + + /* process literal */ + if (here.op == 0) { + Tracevv((stderr, here.val >= 0x20 && here.val < 0x7f ? + "inflate: literal '%c'\n" : + "inflate: literal 0x%02x\n", here.val)); + ROOM(); + *put++ = (unsigned char)(length); + left--; + mode = LEN; + break; + } + + /* process end of block */ + if (here.op & 32) { + Tracevv((stderr, "inflate: end of block\n")); + mode = TYPE; + break; + } + + /* invalid code */ + if (here.op & 64) { + strm->msg = (char *)"invalid literal/length code"; + mode = BAD; + break; + } + + /* length code -- get extra bits, if any */ + extra = (unsigned)(here.op) & 31; + if (extra != 0) { + NEEDBITS(extra); + length += BITS(extra); + DROPBITS(extra); + } + Tracevv((stderr, "inflate: length %lu\n", length)); + + /* get distance code */ + for (;;) { + here = distcode[BITS(distbits)]; + if ((unsigned)(here.bits) <= bits) break; + PULLBYTE(); + } + if ((here.op & 0xf0) == 0) { + last = here; + for (;;) { + here = distcode[last.val + + (BITS(last.bits + last.op) >> last.bits)]; + if ((unsigned)(last.bits + here.bits) <= bits) break; + PULLBYTE(); + } + DROPBITS(last.bits); + } + DROPBITS(here.bits); + if (here.op & 64) { + strm->msg = (char *)"invalid distance code"; + mode = BAD; + break; + } + offset = (unsigned)here.val; + + /* get distance extra bits, if any */ + extra = (unsigned)(here.op) & 15; + if (extra != 0) { + NEEDBITS(extra); + offset += BITS(extra); + DROPBITS(extra); + } + if (offset > WSIZE - (wrap ? 0: left)) { + strm->msg = (char *)"invalid distance too far back"; + mode = BAD; + break; + } + Tracevv((stderr, "inflate: distance %lu\n", offset)); + + /* copy match from window to output */ + do { + ROOM(); + copy = WSIZE - offset; + if (copy < left) { + from = put + copy; + copy = left - copy; + } + else { + from = put - offset; + copy = left; + } + if (copy > length) copy = length; + length -= copy; + left -= copy; + do { + *put++ = *from++; + } while (--copy); + } while (length != 0); + break; + + case DONE: + /* inflate stream terminated properly -- write leftover output */ + ret = Z_STREAM_END; + if (left < WSIZE) { + if (out(out_desc, window, (unsigned)(WSIZE - left))) + ret = Z_BUF_ERROR; + } + goto inf_leave; + + case BAD: + ret = Z_DATA_ERROR; + goto inf_leave; + + default: /* can't happen, but makes compilers happy */ + ret = Z_STREAM_ERROR; + goto inf_leave; + } + + /* Return unused input */ + inf_leave: + strm->next_in = next; + strm->avail_in = have; + return ret; +} + +int ZEXPORT inflateBack9End(z_stream FAR *strm) { + if (strm == Z_NULL || strm->state == Z_NULL || strm->zfree == (free_func)0) + return Z_STREAM_ERROR; + ZFREE(strm, strm->state); + strm->state = Z_NULL; + Tracev((stderr, "inflate: end\n")); + return Z_OK; +} diff --git a/c-blosc/internal-complibs/zlib-1.3.1/contrib/infback9/infback9.h b/c-blosc/internal-complibs/zlib-1.3.1/contrib/infback9/infback9.h new file mode 100644 index 0000000..8371b4e --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.3.1/contrib/infback9/infback9.h @@ -0,0 +1,37 @@ +/* infback9.h -- header for using inflateBack9 functions + * Copyright (C) 2003 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* + * This header file and associated patches provide a decoder for PKWare's + * undocumented deflate64 compression method (method 9). Use with infback9.c, + * inftree9.h, inftree9.c, and inffix9.h. These patches are not supported. + * This should be compiled with zlib, since it uses zutil.h and zutil.o. + * This code has not yet been tested on 16-bit architectures. See the + * comments in zlib.h for inflateBack() usage. These functions are used + * identically, except that there is no windowBits parameter, and a 64K + * window must be provided. Also if int's are 16 bits, then a zero for + * the third parameter of the "out" function actually means 65536UL. + * zlib.h must be included before this header file. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +ZEXTERN int ZEXPORT inflateBack9(z_stream FAR *strm, + in_func in, void FAR *in_desc, + out_func out, void FAR *out_desc); +ZEXTERN int ZEXPORT inflateBack9End(z_stream FAR *strm); +ZEXTERN int ZEXPORT inflateBack9Init_(z_stream FAR *strm, + unsigned char FAR *window, + const char *version, + int stream_size); +#define inflateBack9Init(strm, window) \ + inflateBack9Init_((strm), (window), \ + ZLIB_VERSION, sizeof(z_stream)) + +#ifdef __cplusplus +} +#endif diff --git a/c-blosc/internal-complibs/zlib-1.3.1/contrib/infback9/inffix9.h b/c-blosc/internal-complibs/zlib-1.3.1/contrib/infback9/inffix9.h new file mode 100644 index 0000000..ee5671d --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.3.1/contrib/infback9/inffix9.h @@ -0,0 +1,107 @@ + /* inffix9.h -- table for decoding deflate64 fixed codes + * Generated automatically by makefixed9(). + */ + + /* WARNING: this file should *not* be used by applications. + It is part of the implementation of this library and is + subject to change. Applications should only use zlib.h. + */ + + static const code lenfix[512] = { + {96,7,0},{0,8,80},{0,8,16},{132,8,115},{130,7,31},{0,8,112}, + {0,8,48},{0,9,192},{128,7,10},{0,8,96},{0,8,32},{0,9,160}, + {0,8,0},{0,8,128},{0,8,64},{0,9,224},{128,7,6},{0,8,88}, + {0,8,24},{0,9,144},{131,7,59},{0,8,120},{0,8,56},{0,9,208}, + {129,7,17},{0,8,104},{0,8,40},{0,9,176},{0,8,8},{0,8,136}, + {0,8,72},{0,9,240},{128,7,4},{0,8,84},{0,8,20},{133,8,227}, + {131,7,43},{0,8,116},{0,8,52},{0,9,200},{129,7,13},{0,8,100}, + {0,8,36},{0,9,168},{0,8,4},{0,8,132},{0,8,68},{0,9,232}, + {128,7,8},{0,8,92},{0,8,28},{0,9,152},{132,7,83},{0,8,124}, + {0,8,60},{0,9,216},{130,7,23},{0,8,108},{0,8,44},{0,9,184}, + {0,8,12},{0,8,140},{0,8,76},{0,9,248},{128,7,3},{0,8,82}, + {0,8,18},{133,8,163},{131,7,35},{0,8,114},{0,8,50},{0,9,196}, + {129,7,11},{0,8,98},{0,8,34},{0,9,164},{0,8,2},{0,8,130}, + {0,8,66},{0,9,228},{128,7,7},{0,8,90},{0,8,26},{0,9,148}, + {132,7,67},{0,8,122},{0,8,58},{0,9,212},{130,7,19},{0,8,106}, + {0,8,42},{0,9,180},{0,8,10},{0,8,138},{0,8,74},{0,9,244}, + {128,7,5},{0,8,86},{0,8,22},{65,8,0},{131,7,51},{0,8,118}, + {0,8,54},{0,9,204},{129,7,15},{0,8,102},{0,8,38},{0,9,172}, + {0,8,6},{0,8,134},{0,8,70},{0,9,236},{128,7,9},{0,8,94}, + {0,8,30},{0,9,156},{132,7,99},{0,8,126},{0,8,62},{0,9,220}, + {130,7,27},{0,8,110},{0,8,46},{0,9,188},{0,8,14},{0,8,142}, + {0,8,78},{0,9,252},{96,7,0},{0,8,81},{0,8,17},{133,8,131}, + {130,7,31},{0,8,113},{0,8,49},{0,9,194},{128,7,10},{0,8,97}, + {0,8,33},{0,9,162},{0,8,1},{0,8,129},{0,8,65},{0,9,226}, + {128,7,6},{0,8,89},{0,8,25},{0,9,146},{131,7,59},{0,8,121}, + {0,8,57},{0,9,210},{129,7,17},{0,8,105},{0,8,41},{0,9,178}, + {0,8,9},{0,8,137},{0,8,73},{0,9,242},{128,7,4},{0,8,85}, + {0,8,21},{144,8,3},{131,7,43},{0,8,117},{0,8,53},{0,9,202}, + {129,7,13},{0,8,101},{0,8,37},{0,9,170},{0,8,5},{0,8,133}, + {0,8,69},{0,9,234},{128,7,8},{0,8,93},{0,8,29},{0,9,154}, + {132,7,83},{0,8,125},{0,8,61},{0,9,218},{130,7,23},{0,8,109}, + {0,8,45},{0,9,186},{0,8,13},{0,8,141},{0,8,77},{0,9,250}, + {128,7,3},{0,8,83},{0,8,19},{133,8,195},{131,7,35},{0,8,115}, + {0,8,51},{0,9,198},{129,7,11},{0,8,99},{0,8,35},{0,9,166}, + {0,8,3},{0,8,131},{0,8,67},{0,9,230},{128,7,7},{0,8,91}, + {0,8,27},{0,9,150},{132,7,67},{0,8,123},{0,8,59},{0,9,214}, + {130,7,19},{0,8,107},{0,8,43},{0,9,182},{0,8,11},{0,8,139}, + {0,8,75},{0,9,246},{128,7,5},{0,8,87},{0,8,23},{77,8,0}, + {131,7,51},{0,8,119},{0,8,55},{0,9,206},{129,7,15},{0,8,103}, + {0,8,39},{0,9,174},{0,8,7},{0,8,135},{0,8,71},{0,9,238}, + {128,7,9},{0,8,95},{0,8,31},{0,9,158},{132,7,99},{0,8,127}, + {0,8,63},{0,9,222},{130,7,27},{0,8,111},{0,8,47},{0,9,190}, + {0,8,15},{0,8,143},{0,8,79},{0,9,254},{96,7,0},{0,8,80}, + {0,8,16},{132,8,115},{130,7,31},{0,8,112},{0,8,48},{0,9,193}, + {128,7,10},{0,8,96},{0,8,32},{0,9,161},{0,8,0},{0,8,128}, + {0,8,64},{0,9,225},{128,7,6},{0,8,88},{0,8,24},{0,9,145}, + {131,7,59},{0,8,120},{0,8,56},{0,9,209},{129,7,17},{0,8,104}, + {0,8,40},{0,9,177},{0,8,8},{0,8,136},{0,8,72},{0,9,241}, + {128,7,4},{0,8,84},{0,8,20},{133,8,227},{131,7,43},{0,8,116}, + {0,8,52},{0,9,201},{129,7,13},{0,8,100},{0,8,36},{0,9,169}, + {0,8,4},{0,8,132},{0,8,68},{0,9,233},{128,7,8},{0,8,92}, + {0,8,28},{0,9,153},{132,7,83},{0,8,124},{0,8,60},{0,9,217}, + {130,7,23},{0,8,108},{0,8,44},{0,9,185},{0,8,12},{0,8,140}, + {0,8,76},{0,9,249},{128,7,3},{0,8,82},{0,8,18},{133,8,163}, + {131,7,35},{0,8,114},{0,8,50},{0,9,197},{129,7,11},{0,8,98}, + {0,8,34},{0,9,165},{0,8,2},{0,8,130},{0,8,66},{0,9,229}, + {128,7,7},{0,8,90},{0,8,26},{0,9,149},{132,7,67},{0,8,122}, + {0,8,58},{0,9,213},{130,7,19},{0,8,106},{0,8,42},{0,9,181}, + {0,8,10},{0,8,138},{0,8,74},{0,9,245},{128,7,5},{0,8,86}, + {0,8,22},{65,8,0},{131,7,51},{0,8,118},{0,8,54},{0,9,205}, + {129,7,15},{0,8,102},{0,8,38},{0,9,173},{0,8,6},{0,8,134}, + {0,8,70},{0,9,237},{128,7,9},{0,8,94},{0,8,30},{0,9,157}, + {132,7,99},{0,8,126},{0,8,62},{0,9,221},{130,7,27},{0,8,110}, + {0,8,46},{0,9,189},{0,8,14},{0,8,142},{0,8,78},{0,9,253}, + {96,7,0},{0,8,81},{0,8,17},{133,8,131},{130,7,31},{0,8,113}, + {0,8,49},{0,9,195},{128,7,10},{0,8,97},{0,8,33},{0,9,163}, + {0,8,1},{0,8,129},{0,8,65},{0,9,227},{128,7,6},{0,8,89}, + {0,8,25},{0,9,147},{131,7,59},{0,8,121},{0,8,57},{0,9,211}, + {129,7,17},{0,8,105},{0,8,41},{0,9,179},{0,8,9},{0,8,137}, + {0,8,73},{0,9,243},{128,7,4},{0,8,85},{0,8,21},{144,8,3}, + {131,7,43},{0,8,117},{0,8,53},{0,9,203},{129,7,13},{0,8,101}, + {0,8,37},{0,9,171},{0,8,5},{0,8,133},{0,8,69},{0,9,235}, + {128,7,8},{0,8,93},{0,8,29},{0,9,155},{132,7,83},{0,8,125}, + {0,8,61},{0,9,219},{130,7,23},{0,8,109},{0,8,45},{0,9,187}, + {0,8,13},{0,8,141},{0,8,77},{0,9,251},{128,7,3},{0,8,83}, + {0,8,19},{133,8,195},{131,7,35},{0,8,115},{0,8,51},{0,9,199}, + {129,7,11},{0,8,99},{0,8,35},{0,9,167},{0,8,3},{0,8,131}, + {0,8,67},{0,9,231},{128,7,7},{0,8,91},{0,8,27},{0,9,151}, + {132,7,67},{0,8,123},{0,8,59},{0,9,215},{130,7,19},{0,8,107}, + {0,8,43},{0,9,183},{0,8,11},{0,8,139},{0,8,75},{0,9,247}, + {128,7,5},{0,8,87},{0,8,23},{77,8,0},{131,7,51},{0,8,119}, + {0,8,55},{0,9,207},{129,7,15},{0,8,103},{0,8,39},{0,9,175}, + {0,8,7},{0,8,135},{0,8,71},{0,9,239},{128,7,9},{0,8,95}, + {0,8,31},{0,9,159},{132,7,99},{0,8,127},{0,8,63},{0,9,223}, + {130,7,27},{0,8,111},{0,8,47},{0,9,191},{0,8,15},{0,8,143}, + {0,8,79},{0,9,255} + }; + + static const code distfix[32] = { + {128,5,1},{135,5,257},{131,5,17},{139,5,4097},{129,5,5}, + {137,5,1025},{133,5,65},{141,5,16385},{128,5,3},{136,5,513}, + {132,5,33},{140,5,8193},{130,5,9},{138,5,2049},{134,5,129}, + {142,5,32769},{128,5,2},{135,5,385},{131,5,25},{139,5,6145}, + {129,5,7},{137,5,1537},{133,5,97},{141,5,24577},{128,5,4}, + {136,5,769},{132,5,49},{140,5,12289},{130,5,13},{138,5,3073}, + {134,5,193},{142,5,49153} + }; diff --git a/c-blosc/internal-complibs/zlib-1.3.1/contrib/infback9/inflate9.h b/c-blosc/internal-complibs/zlib-1.3.1/contrib/infback9/inflate9.h new file mode 100644 index 0000000..ee9a793 --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.3.1/contrib/infback9/inflate9.h @@ -0,0 +1,47 @@ +/* inflate9.h -- internal inflate state definition + * Copyright (C) 1995-2003 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +/* Possible inflate modes between inflate() calls */ +typedef enum { + TYPE, /* i: waiting for type bits, including last-flag bit */ + STORED, /* i: waiting for stored size (length and complement) */ + TABLE, /* i: waiting for dynamic block table lengths */ + LEN, /* i: waiting for length/lit code */ + DONE, /* finished check, done -- remain here until reset */ + BAD /* got a data error -- remain here until reset */ +} inflate_mode; + +/* + State transitions between above modes - + + (most modes can go to the BAD mode -- not shown for clarity) + + Read deflate blocks: + TYPE -> STORED or TABLE or LEN or DONE + STORED -> TYPE + TABLE -> LENLENS -> CODELENS -> LEN + Read deflate codes: + LEN -> LEN or TYPE + */ + +/* state maintained between inflate() calls. Approximately 7K bytes. */ +struct inflate_state { + /* sliding window */ + unsigned char FAR *window; /* allocated sliding window, if needed */ + /* dynamic table building */ + unsigned ncode; /* number of code length code lengths */ + unsigned nlen; /* number of length code lengths */ + unsigned ndist; /* number of distance code lengths */ + unsigned have; /* number of code lengths in lens[] */ + code FAR *next; /* next available space in codes[] */ + unsigned short lens[320]; /* temporary storage for code lengths */ + unsigned short work[288]; /* work area for code table building */ + code codes[ENOUGH]; /* space for code tables */ +}; diff --git a/c-blosc/internal-complibs/zlib-1.3.1/contrib/infback9/inftree9.c b/c-blosc/internal-complibs/zlib-1.3.1/contrib/infback9/inftree9.c new file mode 100644 index 0000000..ac707ed --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.3.1/contrib/infback9/inftree9.c @@ -0,0 +1,319 @@ +/* inftree9.c -- generate Huffman trees for efficient decoding + * Copyright (C) 1995-2024 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#include "zutil.h" +#include "inftree9.h" + +#define MAXBITS 15 + +const char inflate9_copyright[] = + " inflate9 1.3.1 Copyright 1995-2024 Mark Adler "; +/* + If you use the zlib library in a product, an acknowledgment is welcome + in the documentation of your product. If for some reason you cannot + include such an acknowledgment, I would appreciate that you keep this + copyright string in the executable of your product. + */ + +/* + Build a set of tables to decode the provided canonical Huffman code. + The code lengths are lens[0..codes-1]. The result starts at *table, + whose indices are 0..2^bits-1. work is a writable array of at least + lens shorts, which is used as a work area. type is the type of code + to be generated, CODES, LENS, or DISTS. On return, zero is success, + -1 is an invalid code, and +1 means that ENOUGH isn't enough. table + on return points to the next available entry's address. bits is the + requested root table index bits, and on return it is the actual root + table index bits. It will differ if the request is greater than the + longest code or if it is less than the shortest code. + */ +int inflate_table9(codetype type, unsigned short FAR *lens, unsigned codes, + code FAR * FAR *table, unsigned FAR *bits, + unsigned short FAR *work) { + unsigned len; /* a code's length in bits */ + unsigned sym; /* index of code symbols */ + unsigned min, max; /* minimum and maximum code lengths */ + unsigned root; /* number of index bits for root table */ + unsigned curr; /* number of index bits for current table */ + unsigned drop; /* code bits to drop for sub-table */ + int left; /* number of prefix codes available */ + unsigned used; /* code entries in table used */ + unsigned huff; /* Huffman code */ + unsigned incr; /* for incrementing code, index */ + unsigned fill; /* index for replicating entries */ + unsigned low; /* low bits for current root entry */ + unsigned mask; /* mask for low root bits */ + code this; /* table entry for duplication */ + code FAR *next; /* next available space in table */ + const unsigned short FAR *base; /* base value table to use */ + const unsigned short FAR *extra; /* extra bits table to use */ + int end; /* use base and extra for symbol > end */ + unsigned short count[MAXBITS+1]; /* number of codes of each length */ + unsigned short offs[MAXBITS+1]; /* offsets in table for each length */ + static const unsigned short lbase[31] = { /* Length codes 257..285 base */ + 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, + 19, 23, 27, 31, 35, 43, 51, 59, 67, 83, 99, 115, + 131, 163, 195, 227, 3, 0, 0}; + static const unsigned short lext[31] = { /* Length codes 257..285 extra */ + 128, 128, 128, 128, 128, 128, 128, 128, 129, 129, 129, 129, + 130, 130, 130, 130, 131, 131, 131, 131, 132, 132, 132, 132, + 133, 133, 133, 133, 144, 203, 77}; + static const unsigned short dbase[32] = { /* Distance codes 0..31 base */ + 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, + 65, 97, 129, 193, 257, 385, 513, 769, 1025, 1537, 2049, 3073, + 4097, 6145, 8193, 12289, 16385, 24577, 32769, 49153}; + static const unsigned short dext[32] = { /* Distance codes 0..31 extra */ + 128, 128, 128, 128, 129, 129, 130, 130, 131, 131, 132, 132, + 133, 133, 134, 134, 135, 135, 136, 136, 137, 137, 138, 138, + 139, 139, 140, 140, 141, 141, 142, 142}; + + /* + Process a set of code lengths to create a canonical Huffman code. The + code lengths are lens[0..codes-1]. Each length corresponds to the + symbols 0..codes-1. The Huffman code is generated by first sorting the + symbols by length from short to long, and retaining the symbol order + for codes with equal lengths. Then the code starts with all zero bits + for the first code of the shortest length, and the codes are integer + increments for the same length, and zeros are appended as the length + increases. For the deflate format, these bits are stored backwards + from their more natural integer increment ordering, and so when the + decoding tables are built in the large loop below, the integer codes + are incremented backwards. + + This routine assumes, but does not check, that all of the entries in + lens[] are in the range 0..MAXBITS. The caller must assure this. + 1..MAXBITS is interpreted as that code length. zero means that that + symbol does not occur in this code. + + The codes are sorted by computing a count of codes for each length, + creating from that a table of starting indices for each length in the + sorted table, and then entering the symbols in order in the sorted + table. The sorted table is work[], with that space being provided by + the caller. + + The length counts are used for other purposes as well, i.e. finding + the minimum and maximum length codes, determining if there are any + codes at all, checking for a valid set of lengths, and looking ahead + at length counts to determine sub-table sizes when building the + decoding tables. + */ + + /* accumulate lengths for codes (assumes lens[] all in 0..MAXBITS) */ + for (len = 0; len <= MAXBITS; len++) + count[len] = 0; + for (sym = 0; sym < codes; sym++) + count[lens[sym]]++; + + /* bound code lengths, force root to be within code lengths */ + root = *bits; + for (max = MAXBITS; max >= 1; max--) + if (count[max] != 0) break; + if (root > max) root = max; + if (max == 0) return -1; /* no codes! */ + for (min = 1; min <= MAXBITS; min++) + if (count[min] != 0) break; + if (root < min) root = min; + + /* check for an over-subscribed or incomplete set of lengths */ + left = 1; + for (len = 1; len <= MAXBITS; len++) { + left <<= 1; + left -= count[len]; + if (left < 0) return -1; /* over-subscribed */ + } + if (left > 0 && (type == CODES || max != 1)) + return -1; /* incomplete set */ + + /* generate offsets into symbol table for each length for sorting */ + offs[1] = 0; + for (len = 1; len < MAXBITS; len++) + offs[len + 1] = offs[len] + count[len]; + + /* sort symbols by length, by symbol order within each length */ + for (sym = 0; sym < codes; sym++) + if (lens[sym] != 0) work[offs[lens[sym]]++] = (unsigned short)sym; + + /* + Create and fill in decoding tables. In this loop, the table being + filled is at next and has curr index bits. The code being used is huff + with length len. That code is converted to an index by dropping drop + bits off of the bottom. For codes where len is less than drop + curr, + those top drop + curr - len bits are incremented through all values to + fill the table with replicated entries. + + root is the number of index bits for the root table. When len exceeds + root, sub-tables are created pointed to by the root entry with an index + of the low root bits of huff. This is saved in low to check for when a + new sub-table should be started. drop is zero when the root table is + being filled, and drop is root when sub-tables are being filled. + + When a new sub-table is needed, it is necessary to look ahead in the + code lengths to determine what size sub-table is needed. The length + counts are used for this, and so count[] is decremented as codes are + entered in the tables. + + used keeps track of how many table entries have been allocated from the + provided *table space. It is checked for LENS and DIST tables against + the constants ENOUGH_LENS and ENOUGH_DISTS to guard against changes in + the initial root table size constants. See the comments in inftree9.h + for more information. + + sym increments through all symbols, and the loop terminates when + all codes of length max, i.e. all codes, have been processed. This + routine permits incomplete codes, so another loop after this one fills + in the rest of the decoding tables with invalid code markers. + */ + + /* set up for code type */ + switch (type) { + case CODES: + base = extra = work; /* dummy value--not used */ + end = 19; + break; + case LENS: + base = lbase; + base -= 257; + extra = lext; + extra -= 257; + end = 256; + break; + default: /* DISTS */ + base = dbase; + extra = dext; + end = -1; + } + + /* initialize state for loop */ + huff = 0; /* starting code */ + sym = 0; /* starting code symbol */ + len = min; /* starting code length */ + next = *table; /* current table to fill in */ + curr = root; /* current table index bits */ + drop = 0; /* current bits to drop from code for index */ + low = (unsigned)(-1); /* trigger new sub-table when len > root */ + used = 1U << root; /* use root table entries */ + mask = used - 1; /* mask for comparing low */ + + /* check available table space */ + if ((type == LENS && used >= ENOUGH_LENS) || + (type == DISTS && used >= ENOUGH_DISTS)) + return 1; + + /* process all codes and make table entries */ + for (;;) { + /* create table entry */ + this.bits = (unsigned char)(len - drop); + if ((int)(work[sym]) < end) { + this.op = (unsigned char)0; + this.val = work[sym]; + } + else if ((int)(work[sym]) > end) { + this.op = (unsigned char)(extra[work[sym]]); + this.val = base[work[sym]]; + } + else { + this.op = (unsigned char)(32 + 64); /* end of block */ + this.val = 0; + } + + /* replicate for those indices with low len bits equal to huff */ + incr = 1U << (len - drop); + fill = 1U << curr; + do { + fill -= incr; + next[(huff >> drop) + fill] = this; + } while (fill != 0); + + /* backwards increment the len-bit code huff */ + incr = 1U << (len - 1); + while (huff & incr) + incr >>= 1; + if (incr != 0) { + huff &= incr - 1; + huff += incr; + } + else + huff = 0; + + /* go to next symbol, update count, len */ + sym++; + if (--(count[len]) == 0) { + if (len == max) break; + len = lens[work[sym]]; + } + + /* create new sub-table if needed */ + if (len > root && (huff & mask) != low) { + /* if first time, transition to sub-tables */ + if (drop == 0) + drop = root; + + /* increment past last table */ + next += 1U << curr; + + /* determine length of next table */ + curr = len - drop; + left = (int)(1 << curr); + while (curr + drop < max) { + left -= count[curr + drop]; + if (left <= 0) break; + curr++; + left <<= 1; + } + + /* check for enough space */ + used += 1U << curr; + if ((type == LENS && used >= ENOUGH_LENS) || + (type == DISTS && used >= ENOUGH_DISTS)) + return 1; + + /* point entry in root table to sub-table */ + low = huff & mask; + (*table)[low].op = (unsigned char)curr; + (*table)[low].bits = (unsigned char)root; + (*table)[low].val = (unsigned short)(next - *table); + } + } + + /* + Fill in rest of table for incomplete codes. This loop is similar to the + loop above in incrementing huff for table indices. It is assumed that + len is equal to curr + drop, so there is no loop needed to increment + through high index bits. When the current sub-table is filled, the loop + drops back to the root table to fill in any remaining entries there. + */ + this.op = (unsigned char)64; /* invalid code marker */ + this.bits = (unsigned char)(len - drop); + this.val = (unsigned short)0; + while (huff != 0) { + /* when done with sub-table, drop back to root table */ + if (drop != 0 && (huff & mask) != low) { + drop = 0; + len = root; + next = *table; + curr = root; + this.bits = (unsigned char)len; + } + + /* put invalid code marker in table */ + next[huff >> drop] = this; + + /* backwards increment the len-bit code huff */ + incr = 1U << (len - 1); + while (huff & incr) + incr >>= 1; + if (incr != 0) { + huff &= incr - 1; + huff += incr; + } + else + huff = 0; + } + + /* set return parameters */ + *table += used; + *bits = root; + return 0; +} diff --git a/c-blosc/internal-complibs/zlib-1.3.1/contrib/infback9/inftree9.h b/c-blosc/internal-complibs/zlib-1.3.1/contrib/infback9/inftree9.h new file mode 100644 index 0000000..ab2ea28 --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.3.1/contrib/infback9/inftree9.h @@ -0,0 +1,61 @@ +/* inftree9.h -- header to use inftree9.c + * Copyright (C) 1995-2008 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +/* Structure for decoding tables. Each entry provides either the + information needed to do the operation requested by the code that + indexed that table entry, or it provides a pointer to another + table that indexes more bits of the code. op indicates whether + the entry is a pointer to another table, a literal, a length or + distance, an end-of-block, or an invalid code. For a table + pointer, the low four bits of op is the number of index bits of + that table. For a length or distance, the low four bits of op + is the number of extra bits to get after the code. bits is + the number of bits in this code or part of the code to drop off + of the bit buffer. val is the actual byte to output in the case + of a literal, the base length or distance, or the offset from + the current table to the next table. Each entry is four bytes. */ +typedef struct { + unsigned char op; /* operation, extra bits, table bits */ + unsigned char bits; /* bits in this part of the code */ + unsigned short val; /* offset in table or code value */ +} code; + +/* op values as set by inflate_table(): + 00000000 - literal + 0000tttt - table link, tttt != 0 is the number of table index bits + 100eeeee - length or distance, eeee is the number of extra bits + 01100000 - end of block + 01000000 - invalid code + */ + +/* Maximum size of the dynamic table. The maximum number of code structures is + 1446, which is the sum of 852 for literal/length codes and 594 for distance + codes. These values were found by exhaustive searches using the program + examples/enough.c found in the zlib distribution. The arguments to that + program are the number of symbols, the initial root table size, and the + maximum bit length of a code. "enough 286 9 15" for literal/length codes + returns 852, and "enough 32 6 15" for distance codes returns 594. The + initial root table size (9 or 6) is found in the fifth argument of the + inflate_table() calls in infback9.c. If the root table size is changed, + then these maximum sizes would be need to be recalculated and updated. */ +#define ENOUGH_LENS 852 +#define ENOUGH_DISTS 594 +#define ENOUGH (ENOUGH_LENS+ENOUGH_DISTS) + +/* Type of code to build for inflate_table9() */ +typedef enum { + CODES, + LENS, + DISTS +} codetype; + +extern int inflate_table9(codetype type, unsigned short FAR *lens, + unsigned codes, code FAR * FAR *table, + unsigned FAR *bits, unsigned short FAR *work); diff --git a/c-blosc/internal-complibs/zlib-1.3.1/contrib/iostream/zfstream.h b/c-blosc/internal-complibs/zlib-1.3.1/contrib/iostream/zfstream.h new file mode 100644 index 0000000..ed79098 --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.3.1/contrib/iostream/zfstream.h @@ -0,0 +1,128 @@ + +#ifndef zfstream_h +#define zfstream_h + +#include +#include "zlib.h" + +class gzfilebuf : public streambuf { + +public: + + gzfilebuf( ); + virtual ~gzfilebuf(); + + gzfilebuf *open( const char *name, int io_mode ); + gzfilebuf *attach( int file_descriptor, int io_mode ); + gzfilebuf *close(); + + int setcompressionlevel( int comp_level ); + int setcompressionstrategy( int comp_strategy ); + + inline int is_open() const { return (file !=NULL); } + + virtual streampos seekoff( streamoff, ios::seek_dir, int ); + + virtual int sync(); + +protected: + + virtual int underflow(); + virtual int overflow( int = EOF ); + +private: + + gzFile file; + short mode; + short own_file_descriptor; + + int flushbuf(); + int fillbuf(); + +}; + +class gzfilestream_common : virtual public ios { + + friend class gzifstream; + friend class gzofstream; + friend gzofstream &setcompressionlevel( gzofstream &, int ); + friend gzofstream &setcompressionstrategy( gzofstream &, int ); + +public: + virtual ~gzfilestream_common(); + + void attach( int fd, int io_mode ); + void open( const char *name, int io_mode ); + void close(); + +protected: + gzfilestream_common(); + +private: + gzfilebuf *rdbuf(); + + gzfilebuf buffer; + +}; + +class gzifstream : public gzfilestream_common, public istream { + +public: + + gzifstream(); + gzifstream( const char *name, int io_mode = ios::in ); + gzifstream( int fd, int io_mode = ios::in ); + + virtual ~gzifstream(); + +}; + +class gzofstream : public gzfilestream_common, public ostream { + +public: + + gzofstream(); + gzofstream( const char *name, int io_mode = ios::out ); + gzofstream( int fd, int io_mode = ios::out ); + + virtual ~gzofstream(); + +}; + +template class gzomanip { + friend gzofstream &operator<<(gzofstream &, const gzomanip &); +public: + gzomanip(gzofstream &(*f)(gzofstream &, T), T v) : func(f), val(v) { } +private: + gzofstream &(*func)(gzofstream &, T); + T val; +}; + +template gzofstream &operator<<(gzofstream &s, const gzomanip &m) +{ + return (*m.func)(s, m.val); +} + +inline gzofstream &setcompressionlevel( gzofstream &s, int l ) +{ + (s.rdbuf())->setcompressionlevel(l); + return s; +} + +inline gzofstream &setcompressionstrategy( gzofstream &s, int l ) +{ + (s.rdbuf())->setcompressionstrategy(l); + return s; +} + +inline gzomanip setcompressionlevel(int l) +{ + return gzomanip(&setcompressionlevel,l); +} + +inline gzomanip setcompressionstrategy(int l) +{ + return gzomanip(&setcompressionstrategy,l); +} + +#endif diff --git a/c-blosc/internal-complibs/zlib-1.3.1/contrib/iostream2/zstream.h b/c-blosc/internal-complibs/zlib-1.3.1/contrib/iostream2/zstream.h new file mode 100644 index 0000000..43d2332 --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.3.1/contrib/iostream2/zstream.h @@ -0,0 +1,307 @@ +/* + * + * Copyright (c) 1997 + * Christian Michelsen Research AS + * Advanced Computing + * Fantoftvegen 38, 5036 BERGEN, Norway + * http://www.cmr.no + * + * Permission to use, copy, modify, distribute and sell this software + * and its documentation for any purpose is hereby granted without fee, + * provided that the above copyright notice appear in all copies and + * that both that copyright notice and this permission notice appear + * in supporting documentation. Christian Michelsen Research AS makes no + * representations about the suitability of this software for any + * purpose. It is provided "as is" without express or implied warranty. + * + */ + +#ifndef ZSTREAM__H +#define ZSTREAM__H + +/* + * zstream.h - C++ interface to the 'zlib' general purpose compression library + * $Id: zstream.h 1.1 1997-06-25 12:00:56+02 tyge Exp tyge $ + */ + +#include +#include +#include +#include "zlib.h" + +#if defined(_WIN32) +# include +# include +# define SET_BINARY_MODE(file) setmode(fileno(file), O_BINARY) +#else +# define SET_BINARY_MODE(file) +#endif + +class zstringlen { +public: + zstringlen(class izstream&); + zstringlen(class ozstream&, const char*); + size_t value() const { return val.word; } +private: + struct Val { unsigned char byte; size_t word; } val; +}; + +// ----------------------------- izstream ----------------------------- + +class izstream +{ + public: + izstream() : m_fp(0) {} + izstream(FILE* fp) : m_fp(0) { open(fp); } + izstream(const char* name) : m_fp(0) { open(name); } + ~izstream() { close(); } + + /* Opens a gzip (.gz) file for reading. + * open() can be used to read a file which is not in gzip format; + * in this case read() will directly read from the file without + * decompression. errno can be checked to distinguish two error + * cases (if errno is zero, the zlib error is Z_MEM_ERROR). + */ + void open(const char* name) { + if (m_fp) close(); + m_fp = ::gzopen(name, "rb"); + } + + void open(FILE* fp) { + SET_BINARY_MODE(fp); + if (m_fp) close(); + m_fp = ::gzdopen(fileno(fp), "rb"); + } + + /* Flushes all pending input if necessary, closes the compressed file + * and deallocates all the (de)compression state. The return value is + * the zlib error number (see function error() below). + */ + int close() { + int r = ::gzclose(m_fp); + m_fp = 0; return r; + } + + /* Binary read the given number of bytes from the compressed file. + */ + int read(void* buf, size_t len) { + return ::gzread(m_fp, buf, len); + } + + /* Returns the error message for the last error which occurred on the + * given compressed file. errnum is set to zlib error number. If an + * error occurred in the file system and not in the compression library, + * errnum is set to Z_ERRNO and the application may consult errno + * to get the exact error code. + */ + const char* error(int* errnum) { + return ::gzerror(m_fp, errnum); + } + + gzFile fp() { return m_fp; } + + private: + gzFile m_fp; +}; + +/* + * Binary read the given (array of) object(s) from the compressed file. + * If the input file was not in gzip format, read() copies the objects number + * of bytes into the buffer. + * returns the number of uncompressed bytes actually read + * (0 for end of file, -1 for error). + */ +template +inline int read(izstream& zs, T* x, Items items) { + return ::gzread(zs.fp(), x, items*sizeof(T)); +} + +/* + * Binary input with the '>' operator. + */ +template +inline izstream& operator>(izstream& zs, T& x) { + ::gzread(zs.fp(), &x, sizeof(T)); + return zs; +} + + +inline zstringlen::zstringlen(izstream& zs) { + zs > val.byte; + if (val.byte == 255) zs > val.word; + else val.word = val.byte; +} + +/* + * Read length of string + the string with the '>' operator. + */ +inline izstream& operator>(izstream& zs, char* x) { + zstringlen len(zs); + ::gzread(zs.fp(), x, len.value()); + x[len.value()] = '\0'; + return zs; +} + +inline char* read_string(izstream& zs) { + zstringlen len(zs); + char* x = new char[len.value()+1]; + ::gzread(zs.fp(), x, len.value()); + x[len.value()] = '\0'; + return x; +} + +// ----------------------------- ozstream ----------------------------- + +class ozstream +{ + public: + ozstream() : m_fp(0), m_os(0) { + } + ozstream(FILE* fp, int level = Z_DEFAULT_COMPRESSION) + : m_fp(0), m_os(0) { + open(fp, level); + } + ozstream(const char* name, int level = Z_DEFAULT_COMPRESSION) + : m_fp(0), m_os(0) { + open(name, level); + } + ~ozstream() { + close(); + } + + /* Opens a gzip (.gz) file for writing. + * The compression level parameter should be in 0..9 + * errno can be checked to distinguish two error cases + * (if errno is zero, the zlib error is Z_MEM_ERROR). + */ + void open(const char* name, int level = Z_DEFAULT_COMPRESSION) { + char mode[4] = "wb\0"; + if (level != Z_DEFAULT_COMPRESSION) mode[2] = '0'+level; + if (m_fp) close(); + m_fp = ::gzopen(name, mode); + } + + /* open from a FILE pointer. + */ + void open(FILE* fp, int level = Z_DEFAULT_COMPRESSION) { + SET_BINARY_MODE(fp); + char mode[4] = "wb\0"; + if (level != Z_DEFAULT_COMPRESSION) mode[2] = '0'+level; + if (m_fp) close(); + m_fp = ::gzdopen(fileno(fp), mode); + } + + /* Flushes all pending output if necessary, closes the compressed file + * and deallocates all the (de)compression state. The return value is + * the zlib error number (see function error() below). + */ + int close() { + if (m_os) { + ::gzwrite(m_fp, m_os->str(), m_os->pcount()); + delete[] m_os->str(); delete m_os; m_os = 0; + } + int r = ::gzclose(m_fp); m_fp = 0; return r; + } + + /* Binary write the given number of bytes into the compressed file. + */ + int write(const void* buf, size_t len) { + return ::gzwrite(m_fp, (voidp) buf, len); + } + + /* Flushes all pending output into the compressed file. The parameter + * _flush is as in the deflate() function. The return value is the zlib + * error number (see function gzerror below). flush() returns Z_OK if + * the flush_ parameter is Z_FINISH and all output could be flushed. + * flush() should be called only when strictly necessary because it can + * degrade compression. + */ + int flush(int _flush) { + os_flush(); + return ::gzflush(m_fp, _flush); + } + + /* Returns the error message for the last error which occurred on the + * given compressed file. errnum is set to zlib error number. If an + * error occurred in the file system and not in the compression library, + * errnum is set to Z_ERRNO and the application may consult errno + * to get the exact error code. + */ + const char* error(int* errnum) { + return ::gzerror(m_fp, errnum); + } + + gzFile fp() { return m_fp; } + + ostream& os() { + if (m_os == 0) m_os = new ostrstream; + return *m_os; + } + + void os_flush() { + if (m_os && m_os->pcount()>0) { + ostrstream* oss = new ostrstream; + oss->fill(m_os->fill()); + oss->flags(m_os->flags()); + oss->precision(m_os->precision()); + oss->width(m_os->width()); + ::gzwrite(m_fp, m_os->str(), m_os->pcount()); + delete[] m_os->str(); delete m_os; m_os = oss; + } + } + + private: + gzFile m_fp; + ostrstream* m_os; +}; + +/* + * Binary write the given (array of) object(s) into the compressed file. + * returns the number of uncompressed bytes actually written + * (0 in case of error). + */ +template +inline int write(ozstream& zs, const T* x, Items items) { + return ::gzwrite(zs.fp(), (voidp) x, items*sizeof(T)); +} + +/* + * Binary output with the '<' operator. + */ +template +inline ozstream& operator<(ozstream& zs, const T& x) { + ::gzwrite(zs.fp(), (voidp) &x, sizeof(T)); + return zs; +} + +inline zstringlen::zstringlen(ozstream& zs, const char* x) { + val.byte = 255; val.word = ::strlen(x); + if (val.word < 255) zs < (val.byte = val.word); + else zs < val; +} + +/* + * Write length of string + the string with the '<' operator. + */ +inline ozstream& operator<(ozstream& zs, const char* x) { + zstringlen len(zs, x); + ::gzwrite(zs.fp(), (voidp) x, len.value()); + return zs; +} + +#ifdef _MSC_VER +inline ozstream& operator<(ozstream& zs, char* const& x) { + return zs < (const char*) x; +} +#endif + +/* + * Ascii write with the << operator; + */ +template +inline ostream& operator<<(ozstream& zs, const T& x) { + zs.os_flush(); + return zs.os() << x; +} + +#endif diff --git a/c-blosc/internal-complibs/zlib-1.3.1/contrib/iostream3/test.cc b/c-blosc/internal-complibs/zlib-1.3.1/contrib/iostream3/test.cc new file mode 100644 index 0000000..9423533 --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.3.1/contrib/iostream3/test.cc @@ -0,0 +1,50 @@ +/* + * Test program for gzifstream and gzofstream + * + * by Ludwig Schwardt + * original version by Kevin Ruland + */ + +#include "zfstream.h" +#include // for cout + +int main() { + + gzofstream outf; + gzifstream inf; + char buf[80]; + + outf.open("test1.txt.gz"); + outf << "The quick brown fox sidestepped the lazy canine\n" + << 1.3 << "\nPlan " << 9 << std::endl; + outf.close(); + std::cout << "Wrote the following message to 'test1.txt.gz' (check with zcat or zless):\n" + << "The quick brown fox sidestepped the lazy canine\n" + << 1.3 << "\nPlan " << 9 << std::endl; + + std::cout << "\nReading 'test1.txt.gz' (buffered) produces:\n"; + inf.open("test1.txt.gz"); + while (inf.getline(buf,80,'\n')) { + std::cout << buf << "\t(" << inf.rdbuf()->in_avail() << " chars left in buffer)\n"; + } + inf.close(); + + outf.rdbuf()->pubsetbuf(0,0); + outf.open("test2.txt.gz"); + outf << setcompression(Z_NO_COMPRESSION) + << "The quick brown fox sidestepped the lazy canine\n" + << 1.3 << "\nPlan " << 9 << std::endl; + outf.close(); + std::cout << "\nWrote the same message to 'test2.txt.gz' in uncompressed form"; + + std::cout << "\nReading 'test2.txt.gz' (unbuffered) produces:\n"; + inf.rdbuf()->pubsetbuf(0,0); + inf.open("test2.txt.gz"); + while (inf.getline(buf,80,'\n')) { + std::cout << buf << "\t(" << inf.rdbuf()->in_avail() << " chars left in buffer)\n"; + } + inf.close(); + + return 0; + +} diff --git a/c-blosc/internal-complibs/zlib-1.3.1/contrib/iostream3/zfstream.cc b/c-blosc/internal-complibs/zlib-1.3.1/contrib/iostream3/zfstream.cc new file mode 100644 index 0000000..94eb933 --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.3.1/contrib/iostream3/zfstream.cc @@ -0,0 +1,479 @@ +/* + * A C++ I/O streams interface to the zlib gz* functions + * + * by Ludwig Schwardt + * original version by Kevin Ruland + * + * This version is standard-compliant and compatible with gcc 3.x. + */ + +#include "zfstream.h" +#include // for strcpy, strcat, strlen (mode strings) +#include // for BUFSIZ + +// Internal buffer sizes (default and "unbuffered" versions) +#define BIGBUFSIZE BUFSIZ +#define SMALLBUFSIZE 1 + +/*****************************************************************************/ + +// Default constructor +gzfilebuf::gzfilebuf() +: file(NULL), io_mode(std::ios_base::openmode(0)), own_fd(false), + buffer(NULL), buffer_size(BIGBUFSIZE), own_buffer(true) +{ + // No buffers to start with + this->disable_buffer(); +} + +// Destructor +gzfilebuf::~gzfilebuf() +{ + // Sync output buffer and close only if responsible for file + // (i.e. attached streams should be left open at this stage) + this->sync(); + if (own_fd) + this->close(); + // Make sure internal buffer is deallocated + this->disable_buffer(); +} + +// Set compression level and strategy +int +gzfilebuf::setcompression(int comp_level, + int comp_strategy) +{ + return gzsetparams(file, comp_level, comp_strategy); +} + +// Open gzipped file +gzfilebuf* +gzfilebuf::open(const char *name, + std::ios_base::openmode mode) +{ + // Fail if file already open + if (this->is_open()) + return NULL; + // Don't support simultaneous read/write access (yet) + if ((mode & std::ios_base::in) && (mode & std::ios_base::out)) + return NULL; + + // Build mode string for gzopen and check it [27.8.1.3.2] + char char_mode[6] = "\0\0\0\0\0"; + if (!this->open_mode(mode, char_mode)) + return NULL; + + // Attempt to open file + if ((file = gzopen(name, char_mode)) == NULL) + return NULL; + + // On success, allocate internal buffer and set flags + this->enable_buffer(); + io_mode = mode; + own_fd = true; + return this; +} + +// Attach to gzipped file +gzfilebuf* +gzfilebuf::attach(int fd, + std::ios_base::openmode mode) +{ + // Fail if file already open + if (this->is_open()) + return NULL; + // Don't support simultaneous read/write access (yet) + if ((mode & std::ios_base::in) && (mode & std::ios_base::out)) + return NULL; + + // Build mode string for gzdopen and check it [27.8.1.3.2] + char char_mode[6] = "\0\0\0\0\0"; + if (!this->open_mode(mode, char_mode)) + return NULL; + + // Attempt to attach to file + if ((file = gzdopen(fd, char_mode)) == NULL) + return NULL; + + // On success, allocate internal buffer and set flags + this->enable_buffer(); + io_mode = mode; + own_fd = false; + return this; +} + +// Close gzipped file +gzfilebuf* +gzfilebuf::close() +{ + // Fail immediately if no file is open + if (!this->is_open()) + return NULL; + // Assume success + gzfilebuf* retval = this; + // Attempt to sync and close gzipped file + if (this->sync() == -1) + retval = NULL; + if (gzclose(file) < 0) + retval = NULL; + // File is now gone anyway (postcondition [27.8.1.3.8]) + file = NULL; + own_fd = false; + // Destroy internal buffer if it exists + this->disable_buffer(); + return retval; +} + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +// Convert int open mode to mode string +bool +gzfilebuf::open_mode(std::ios_base::openmode mode, + char* c_mode) const +{ + bool testb = mode & std::ios_base::binary; + bool testi = mode & std::ios_base::in; + bool testo = mode & std::ios_base::out; + bool testt = mode & std::ios_base::trunc; + bool testa = mode & std::ios_base::app; + + // Check for valid flag combinations - see [27.8.1.3.2] (Table 92) + // Original zfstream hardcoded the compression level to maximum here... + // Double the time for less than 1% size improvement seems + // excessive though - keeping it at the default level + // To change back, just append "9" to the next three mode strings + if (!testi && testo && !testt && !testa) + strcpy(c_mode, "w"); + if (!testi && testo && !testt && testa) + strcpy(c_mode, "a"); + if (!testi && testo && testt && !testa) + strcpy(c_mode, "w"); + if (testi && !testo && !testt && !testa) + strcpy(c_mode, "r"); + // No read/write mode yet +// if (testi && testo && !testt && !testa) +// strcpy(c_mode, "r+"); +// if (testi && testo && testt && !testa) +// strcpy(c_mode, "w+"); + + // Mode string should be empty for invalid combination of flags + if (strlen(c_mode) == 0) + return false; + if (testb) + strcat(c_mode, "b"); + return true; +} + +// Determine number of characters in internal get buffer +std::streamsize +gzfilebuf::showmanyc() +{ + // Calls to underflow will fail if file not opened for reading + if (!this->is_open() || !(io_mode & std::ios_base::in)) + return -1; + // Make sure get area is in use + if (this->gptr() && (this->gptr() < this->egptr())) + return std::streamsize(this->egptr() - this->gptr()); + else + return 0; +} + +// Fill get area from gzipped file +gzfilebuf::int_type +gzfilebuf::underflow() +{ + // If something is left in the get area by chance, return it + // (this shouldn't normally happen, as underflow is only supposed + // to be called when gptr >= egptr, but it serves as error check) + if (this->gptr() && (this->gptr() < this->egptr())) + return traits_type::to_int_type(*(this->gptr())); + + // If the file hasn't been opened for reading, produce error + if (!this->is_open() || !(io_mode & std::ios_base::in)) + return traits_type::eof(); + + // Attempt to fill internal buffer from gzipped file + // (buffer must be guaranteed to exist...) + int bytes_read = gzread(file, buffer, buffer_size); + // Indicates error or EOF + if (bytes_read <= 0) + { + // Reset get area + this->setg(buffer, buffer, buffer); + return traits_type::eof(); + } + // Make all bytes read from file available as get area + this->setg(buffer, buffer, buffer + bytes_read); + + // Return next character in get area + return traits_type::to_int_type(*(this->gptr())); +} + +// Write put area to gzipped file +gzfilebuf::int_type +gzfilebuf::overflow(int_type c) +{ + // Determine whether put area is in use + if (this->pbase()) + { + // Double-check pointer range + if (this->pptr() > this->epptr() || this->pptr() < this->pbase()) + return traits_type::eof(); + // Add extra character to buffer if not EOF + if (!traits_type::eq_int_type(c, traits_type::eof())) + { + *(this->pptr()) = traits_type::to_char_type(c); + this->pbump(1); + } + // Number of characters to write to file + int bytes_to_write = this->pptr() - this->pbase(); + // Overflow doesn't fail if nothing is to be written + if (bytes_to_write > 0) + { + // If the file hasn't been opened for writing, produce error + if (!this->is_open() || !(io_mode & std::ios_base::out)) + return traits_type::eof(); + // If gzipped file won't accept all bytes written to it, fail + if (gzwrite(file, this->pbase(), bytes_to_write) != bytes_to_write) + return traits_type::eof(); + // Reset next pointer to point to pbase on success + this->pbump(-bytes_to_write); + } + } + // Write extra character to file if not EOF + else if (!traits_type::eq_int_type(c, traits_type::eof())) + { + // If the file hasn't been opened for writing, produce error + if (!this->is_open() || !(io_mode & std::ios_base::out)) + return traits_type::eof(); + // Impromptu char buffer (allows "unbuffered" output) + char_type last_char = traits_type::to_char_type(c); + // If gzipped file won't accept this character, fail + if (gzwrite(file, &last_char, 1) != 1) + return traits_type::eof(); + } + + // If you got here, you have succeeded (even if c was EOF) + // The return value should therefore be non-EOF + if (traits_type::eq_int_type(c, traits_type::eof())) + return traits_type::not_eof(c); + else + return c; +} + +// Assign new buffer +std::streambuf* +gzfilebuf::setbuf(char_type* p, + std::streamsize n) +{ + // First make sure stuff is sync'ed, for safety + if (this->sync() == -1) + return NULL; + // If buffering is turned off on purpose via setbuf(0,0), still allocate one... + // "Unbuffered" only really refers to put [27.8.1.4.10], while get needs at + // least a buffer of size 1 (very inefficient though, therefore make it bigger?) + // This follows from [27.5.2.4.3]/12 (gptr needs to point at something, it seems) + if (!p || !n) + { + // Replace existing buffer (if any) with small internal buffer + this->disable_buffer(); + buffer = NULL; + buffer_size = 0; + own_buffer = true; + this->enable_buffer(); + } + else + { + // Replace existing buffer (if any) with external buffer + this->disable_buffer(); + buffer = p; + buffer_size = n; + own_buffer = false; + this->enable_buffer(); + } + return this; +} + +// Write put area to gzipped file (i.e. ensures that put area is empty) +int +gzfilebuf::sync() +{ + return traits_type::eq_int_type(this->overflow(), traits_type::eof()) ? -1 : 0; +} + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +// Allocate internal buffer +void +gzfilebuf::enable_buffer() +{ + // If internal buffer required, allocate one + if (own_buffer && !buffer) + { + // Check for buffered vs. "unbuffered" + if (buffer_size > 0) + { + // Allocate internal buffer + buffer = new char_type[buffer_size]; + // Get area starts empty and will be expanded by underflow as need arises + this->setg(buffer, buffer, buffer); + // Setup entire internal buffer as put area. + // The one-past-end pointer actually points to the last element of the buffer, + // so that overflow(c) can safely add the extra character c to the sequence. + // These pointers remain in place for the duration of the buffer + this->setp(buffer, buffer + buffer_size - 1); + } + else + { + // Even in "unbuffered" case, (small?) get buffer is still required + buffer_size = SMALLBUFSIZE; + buffer = new char_type[buffer_size]; + this->setg(buffer, buffer, buffer); + // "Unbuffered" means no put buffer + this->setp(0, 0); + } + } + else + { + // If buffer already allocated, reset buffer pointers just to make sure no + // stale chars are lying around + this->setg(buffer, buffer, buffer); + this->setp(buffer, buffer + buffer_size - 1); + } +} + +// Destroy internal buffer +void +gzfilebuf::disable_buffer() +{ + // If internal buffer exists, deallocate it + if (own_buffer && buffer) + { + // Preserve unbuffered status by zeroing size + if (!this->pbase()) + buffer_size = 0; + delete[] buffer; + buffer = NULL; + this->setg(0, 0, 0); + this->setp(0, 0); + } + else + { + // Reset buffer pointers to initial state if external buffer exists + this->setg(buffer, buffer, buffer); + if (buffer) + this->setp(buffer, buffer + buffer_size - 1); + else + this->setp(0, 0); + } +} + +/*****************************************************************************/ + +// Default constructor initializes stream buffer +gzifstream::gzifstream() +: std::istream(NULL), sb() +{ this->init(&sb); } + +// Initialize stream buffer and open file +gzifstream::gzifstream(const char* name, + std::ios_base::openmode mode) +: std::istream(NULL), sb() +{ + this->init(&sb); + this->open(name, mode); +} + +// Initialize stream buffer and attach to file +gzifstream::gzifstream(int fd, + std::ios_base::openmode mode) +: std::istream(NULL), sb() +{ + this->init(&sb); + this->attach(fd, mode); +} + +// Open file and go into fail() state if unsuccessful +void +gzifstream::open(const char* name, + std::ios_base::openmode mode) +{ + if (!sb.open(name, mode | std::ios_base::in)) + this->setstate(std::ios_base::failbit); + else + this->clear(); +} + +// Attach to file and go into fail() state if unsuccessful +void +gzifstream::attach(int fd, + std::ios_base::openmode mode) +{ + if (!sb.attach(fd, mode | std::ios_base::in)) + this->setstate(std::ios_base::failbit); + else + this->clear(); +} + +// Close file +void +gzifstream::close() +{ + if (!sb.close()) + this->setstate(std::ios_base::failbit); +} + +/*****************************************************************************/ + +// Default constructor initializes stream buffer +gzofstream::gzofstream() +: std::ostream(NULL), sb() +{ this->init(&sb); } + +// Initialize stream buffer and open file +gzofstream::gzofstream(const char* name, + std::ios_base::openmode mode) +: std::ostream(NULL), sb() +{ + this->init(&sb); + this->open(name, mode); +} + +// Initialize stream buffer and attach to file +gzofstream::gzofstream(int fd, + std::ios_base::openmode mode) +: std::ostream(NULL), sb() +{ + this->init(&sb); + this->attach(fd, mode); +} + +// Open file and go into fail() state if unsuccessful +void +gzofstream::open(const char* name, + std::ios_base::openmode mode) +{ + if (!sb.open(name, mode | std::ios_base::out)) + this->setstate(std::ios_base::failbit); + else + this->clear(); +} + +// Attach to file and go into fail() state if unsuccessful +void +gzofstream::attach(int fd, + std::ios_base::openmode mode) +{ + if (!sb.attach(fd, mode | std::ios_base::out)) + this->setstate(std::ios_base::failbit); + else + this->clear(); +} + +// Close file +void +gzofstream::close() +{ + if (!sb.close()) + this->setstate(std::ios_base::failbit); +} diff --git a/c-blosc/internal-complibs/zlib-1.3.1/contrib/iostream3/zfstream.h b/c-blosc/internal-complibs/zlib-1.3.1/contrib/iostream3/zfstream.h new file mode 100644 index 0000000..3dabc0f --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.3.1/contrib/iostream3/zfstream.h @@ -0,0 +1,466 @@ +/* + * A C++ I/O streams interface to the zlib gz* functions + * + * by Ludwig Schwardt + * original version by Kevin Ruland + * + * This version is standard-compliant and compatible with gcc 3.x. + */ + +#ifndef ZFSTREAM_H +#define ZFSTREAM_H + +#include // not iostream, since we don't need cin/cout +#include +#include "zlib.h" + +/*****************************************************************************/ + +/** + * @brief Gzipped file stream buffer class. + * + * This class implements basic_filebuf for gzipped files. It doesn't yet support + * seeking (allowed by zlib but slow/limited), putback and read/write access + * (tricky). Otherwise, it attempts to be a drop-in replacement for the standard + * file streambuf. +*/ +class gzfilebuf : public std::streambuf +{ +public: + // Default constructor. + gzfilebuf(); + + // Destructor. + virtual + ~gzfilebuf(); + + /** + * @brief Set compression level and strategy on the fly. + * @param comp_level Compression level (see zlib.h for allowed values) + * @param comp_strategy Compression strategy (see zlib.h for allowed values) + * @return Z_OK on success, Z_STREAM_ERROR otherwise. + * + * Unfortunately, these parameters cannot be modified separately, as the + * previous zfstream version assumed. Since the strategy is seldom changed, + * it can default and setcompression(level) then becomes like the old + * setcompressionlevel(level). + */ + int + setcompression(int comp_level, + int comp_strategy = Z_DEFAULT_STRATEGY); + + /** + * @brief Check if file is open. + * @return True if file is open. + */ + bool + is_open() const { return (file != NULL); } + + /** + * @brief Open gzipped file. + * @param name File name. + * @param mode Open mode flags. + * @return @c this on success, NULL on failure. + */ + gzfilebuf* + open(const char* name, + std::ios_base::openmode mode); + + /** + * @brief Attach to already open gzipped file. + * @param fd File descriptor. + * @param mode Open mode flags. + * @return @c this on success, NULL on failure. + */ + gzfilebuf* + attach(int fd, + std::ios_base::openmode mode); + + /** + * @brief Close gzipped file. + * @return @c this on success, NULL on failure. + */ + gzfilebuf* + close(); + +protected: + /** + * @brief Convert ios open mode int to mode string used by zlib. + * @return True if valid mode flag combination. + */ + bool + open_mode(std::ios_base::openmode mode, + char* c_mode) const; + + /** + * @brief Number of characters available in stream buffer. + * @return Number of characters. + * + * This indicates number of characters in get area of stream buffer. + * These characters can be read without accessing the gzipped file. + */ + virtual std::streamsize + showmanyc(); + + /** + * @brief Fill get area from gzipped file. + * @return First character in get area on success, EOF on error. + * + * This actually reads characters from gzipped file to stream + * buffer. Always buffered. + */ + virtual int_type + underflow(); + + /** + * @brief Write put area to gzipped file. + * @param c Extra character to add to buffer contents. + * @return Non-EOF on success, EOF on error. + * + * This actually writes characters in stream buffer to + * gzipped file. With unbuffered output this is done one + * character at a time. + */ + virtual int_type + overflow(int_type c = traits_type::eof()); + + /** + * @brief Installs external stream buffer. + * @param p Pointer to char buffer. + * @param n Size of external buffer. + * @return @c this on success, NULL on failure. + * + * Call setbuf(0,0) to enable unbuffered output. + */ + virtual std::streambuf* + setbuf(char_type* p, + std::streamsize n); + + /** + * @brief Flush stream buffer to file. + * @return 0 on success, -1 on error. + * + * This calls underflow(EOF) to do the job. + */ + virtual int + sync(); + +// +// Some future enhancements +// +// virtual int_type uflow(); +// virtual int_type pbackfail(int_type c = traits_type::eof()); +// virtual pos_type +// seekoff(off_type off, +// std::ios_base::seekdir way, +// std::ios_base::openmode mode = std::ios_base::in|std::ios_base::out); +// virtual pos_type +// seekpos(pos_type sp, +// std::ios_base::openmode mode = std::ios_base::in|std::ios_base::out); + +private: + /** + * @brief Allocate internal buffer. + * + * This function is safe to call multiple times. It will ensure + * that a proper internal buffer exists if it is required. If the + * buffer already exists or is external, the buffer pointers will be + * reset to their original state. + */ + void + enable_buffer(); + + /** + * @brief Destroy internal buffer. + * + * This function is safe to call multiple times. It will ensure + * that the internal buffer is deallocated if it exists. In any + * case, it will also reset the buffer pointers. + */ + void + disable_buffer(); + + /** + * Underlying file pointer. + */ + gzFile file; + + /** + * Mode in which file was opened. + */ + std::ios_base::openmode io_mode; + + /** + * @brief True if this object owns file descriptor. + * + * This makes the class responsible for closing the file + * upon destruction. + */ + bool own_fd; + + /** + * @brief Stream buffer. + * + * For simplicity this remains allocated on the free store for the + * entire life span of the gzfilebuf object, unless replaced by setbuf. + */ + char_type* buffer; + + /** + * @brief Stream buffer size. + * + * Defaults to system default buffer size (typically 8192 bytes). + * Modified by setbuf. + */ + std::streamsize buffer_size; + + /** + * @brief True if this object owns stream buffer. + * + * This makes the class responsible for deleting the buffer + * upon destruction. + */ + bool own_buffer; +}; + +/*****************************************************************************/ + +/** + * @brief Gzipped file input stream class. + * + * This class implements ifstream for gzipped files. Seeking and putback + * is not supported yet. +*/ +class gzifstream : public std::istream +{ +public: + // Default constructor + gzifstream(); + + /** + * @brief Construct stream on gzipped file to be opened. + * @param name File name. + * @param mode Open mode flags (forced to contain ios::in). + */ + explicit + gzifstream(const char* name, + std::ios_base::openmode mode = std::ios_base::in); + + /** + * @brief Construct stream on already open gzipped file. + * @param fd File descriptor. + * @param mode Open mode flags (forced to contain ios::in). + */ + explicit + gzifstream(int fd, + std::ios_base::openmode mode = std::ios_base::in); + + /** + * Obtain underlying stream buffer. + */ + gzfilebuf* + rdbuf() const + { return const_cast(&sb); } + + /** + * @brief Check if file is open. + * @return True if file is open. + */ + bool + is_open() { return sb.is_open(); } + + /** + * @brief Open gzipped file. + * @param name File name. + * @param mode Open mode flags (forced to contain ios::in). + * + * Stream will be in state good() if file opens successfully; + * otherwise in state fail(). This differs from the behavior of + * ifstream, which never sets the state to good() and therefore + * won't allow you to reuse the stream for a second file unless + * you manually clear() the state. The choice is a matter of + * convenience. + */ + void + open(const char* name, + std::ios_base::openmode mode = std::ios_base::in); + + /** + * @brief Attach to already open gzipped file. + * @param fd File descriptor. + * @param mode Open mode flags (forced to contain ios::in). + * + * Stream will be in state good() if attach succeeded; otherwise + * in state fail(). + */ + void + attach(int fd, + std::ios_base::openmode mode = std::ios_base::in); + + /** + * @brief Close gzipped file. + * + * Stream will be in state fail() if close failed. + */ + void + close(); + +private: + /** + * Underlying stream buffer. + */ + gzfilebuf sb; +}; + +/*****************************************************************************/ + +/** + * @brief Gzipped file output stream class. + * + * This class implements ofstream for gzipped files. Seeking and putback + * is not supported yet. +*/ +class gzofstream : public std::ostream +{ +public: + // Default constructor + gzofstream(); + + /** + * @brief Construct stream on gzipped file to be opened. + * @param name File name. + * @param mode Open mode flags (forced to contain ios::out). + */ + explicit + gzofstream(const char* name, + std::ios_base::openmode mode = std::ios_base::out); + + /** + * @brief Construct stream on already open gzipped file. + * @param fd File descriptor. + * @param mode Open mode flags (forced to contain ios::out). + */ + explicit + gzofstream(int fd, + std::ios_base::openmode mode = std::ios_base::out); + + /** + * Obtain underlying stream buffer. + */ + gzfilebuf* + rdbuf() const + { return const_cast(&sb); } + + /** + * @brief Check if file is open. + * @return True if file is open. + */ + bool + is_open() { return sb.is_open(); } + + /** + * @brief Open gzipped file. + * @param name File name. + * @param mode Open mode flags (forced to contain ios::out). + * + * Stream will be in state good() if file opens successfully; + * otherwise in state fail(). This differs from the behavior of + * ofstream, which never sets the state to good() and therefore + * won't allow you to reuse the stream for a second file unless + * you manually clear() the state. The choice is a matter of + * convenience. + */ + void + open(const char* name, + std::ios_base::openmode mode = std::ios_base::out); + + /** + * @brief Attach to already open gzipped file. + * @param fd File descriptor. + * @param mode Open mode flags (forced to contain ios::out). + * + * Stream will be in state good() if attach succeeded; otherwise + * in state fail(). + */ + void + attach(int fd, + std::ios_base::openmode mode = std::ios_base::out); + + /** + * @brief Close gzipped file. + * + * Stream will be in state fail() if close failed. + */ + void + close(); + +private: + /** + * Underlying stream buffer. + */ + gzfilebuf sb; +}; + +/*****************************************************************************/ + +/** + * @brief Gzipped file output stream manipulator class. + * + * This class defines a two-argument manipulator for gzofstream. It is used + * as base for the setcompression(int,int) manipulator. +*/ +template + class gzomanip2 + { + public: + // Allows inserter to peek at internals + template + friend gzofstream& + operator<<(gzofstream&, + const gzomanip2&); + + // Constructor + gzomanip2(gzofstream& (*f)(gzofstream&, T1, T2), + T1 v1, + T2 v2); + private: + // Underlying manipulator function + gzofstream& + (*func)(gzofstream&, T1, T2); + + // Arguments for manipulator function + T1 val1; + T2 val2; + }; + +/*****************************************************************************/ + +// Manipulator function thunks through to stream buffer +inline gzofstream& +setcompression(gzofstream &gzs, int l, int s = Z_DEFAULT_STRATEGY) +{ + (gzs.rdbuf())->setcompression(l, s); + return gzs; +} + +// Manipulator constructor stores arguments +template + inline + gzomanip2::gzomanip2(gzofstream &(*f)(gzofstream &, T1, T2), + T1 v1, + T2 v2) + : func(f), val1(v1), val2(v2) + { } + +// Inserter applies underlying manipulator function to stream +template + inline gzofstream& + operator<<(gzofstream& s, const gzomanip2& m) + { return (*m.func)(s, m.val1, m.val2); } + +// Insert this onto stream to simplify setting of compression level +inline gzomanip2 +setcompression(int l, int s = Z_DEFAULT_STRATEGY) +{ return gzomanip2(&setcompression, l, s); } + +#endif // ZFSTREAM_H diff --git a/c-blosc/internal-complibs/zlib-1.3.1/contrib/minizip/crypt.h b/c-blosc/internal-complibs/zlib-1.3.1/contrib/minizip/crypt.h new file mode 100644 index 0000000..f4b93b7 --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.3.1/contrib/minizip/crypt.h @@ -0,0 +1,128 @@ +/* crypt.h -- base code for crypt/uncrypt ZIPfile + + + Version 1.01e, February 12th, 2005 + + Copyright (C) 1998-2005 Gilles Vollant + + This code is a modified version of crypting code in Infozip distribution + + The encryption/decryption parts of this source code (as opposed to the + non-echoing password parts) were originally written in Europe. The + whole source package can be freely distributed, including from the USA. + (Prior to January 2000, re-export from the US was a violation of US law.) + + This encryption code is a direct transcription of the algorithm from + Roger Schlafly, described by Phil Katz in the file appnote.txt. This + file (appnote.txt) is distributed with the PKZIP program (even in the + version without encryption capabilities). + + If you don't need crypting in your application, just define symbols + NOCRYPT and NOUNCRYPT. + + This code support the "Traditional PKWARE Encryption". + + The new AES encryption added on Zip format by Winzip (see the page + http://www.winzip.com/aes_info.htm ) and PKWare PKZip 5.x Strong + Encryption is not supported. +*/ + +#define CRC32(c, b) ((*(pcrc_32_tab+(((int)(c) ^ (b)) & 0xff))) ^ ((c) >> 8)) + +/*********************************************************************** + * Return the next byte in the pseudo-random sequence + */ +static int decrypt_byte(unsigned long* pkeys, const z_crc_t* pcrc_32_tab) { + unsigned temp; /* POTENTIAL BUG: temp*(temp^1) may overflow in an + * unpredictable manner on 16-bit systems; not a problem + * with any known compiler so far, though */ + + (void)pcrc_32_tab; + temp = ((unsigned)(*(pkeys+2)) & 0xffff) | 2; + return (int)(((temp * (temp ^ 1)) >> 8) & 0xff); +} + +/*********************************************************************** + * Update the encryption keys with the next byte of plain text + */ +static int update_keys(unsigned long* pkeys, const z_crc_t* pcrc_32_tab, int c) { + (*(pkeys+0)) = CRC32((*(pkeys+0)), c); + (*(pkeys+1)) += (*(pkeys+0)) & 0xff; + (*(pkeys+1)) = (*(pkeys+1)) * 134775813L + 1; + { + register int keyshift = (int)((*(pkeys+1)) >> 24); + (*(pkeys+2)) = CRC32((*(pkeys+2)), keyshift); + } + return c; +} + + +/*********************************************************************** + * Initialize the encryption keys and the random header according to + * the given password. + */ +static void init_keys(const char* passwd, unsigned long* pkeys, const z_crc_t* pcrc_32_tab) { + *(pkeys+0) = 305419896L; + *(pkeys+1) = 591751049L; + *(pkeys+2) = 878082192L; + while (*passwd != '\0') { + update_keys(pkeys,pcrc_32_tab,(int)*passwd); + passwd++; + } +} + +#define zdecode(pkeys,pcrc_32_tab,c) \ + (update_keys(pkeys,pcrc_32_tab,c ^= decrypt_byte(pkeys,pcrc_32_tab))) + +#define zencode(pkeys,pcrc_32_tab,c,t) \ + (t=decrypt_byte(pkeys,pcrc_32_tab), update_keys(pkeys,pcrc_32_tab,c), (Byte)t^(c)) + +#ifdef INCLUDECRYPTINGCODE_IFCRYPTALLOWED + +#define RAND_HEAD_LEN 12 + /* "last resort" source for second part of crypt seed pattern */ +# ifndef ZCR_SEED2 +# define ZCR_SEED2 3141592654UL /* use PI as default pattern */ +# endif + +static unsigned crypthead(const char* passwd, /* password string */ + unsigned char* buf, /* where to write header */ + int bufSize, + unsigned long* pkeys, + const z_crc_t* pcrc_32_tab, + unsigned long crcForCrypting) { + unsigned n; /* index in random header */ + int t; /* temporary */ + int c; /* random byte */ + unsigned char header[RAND_HEAD_LEN-2]; /* random header */ + static unsigned calls = 0; /* ensure different random header each time */ + + if (bufSize> 7) & 0xff; + header[n] = (unsigned char)zencode(pkeys, pcrc_32_tab, c, t); + } + /* Encrypt random header (last two bytes is high word of crc) */ + init_keys(passwd, pkeys, pcrc_32_tab); + for (n = 0; n < RAND_HEAD_LEN-2; n++) + { + buf[n] = (unsigned char)zencode(pkeys, pcrc_32_tab, header[n], t); + } + buf[n++] = (unsigned char)zencode(pkeys, pcrc_32_tab, (int)(crcForCrypting >> 16) & 0xff, t); + buf[n++] = (unsigned char)zencode(pkeys, pcrc_32_tab, (int)(crcForCrypting >> 24) & 0xff, t); + return n; +} + +#endif diff --git a/c-blosc/internal-complibs/zlib-1.3.1/contrib/minizip/ioapi.c b/c-blosc/internal-complibs/zlib-1.3.1/contrib/minizip/ioapi.c new file mode 100644 index 0000000..782d324 --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.3.1/contrib/minizip/ioapi.c @@ -0,0 +1,231 @@ +/* ioapi.h -- IO base function header for compress/uncompress .zip + part of the MiniZip project - ( http://www.winimage.com/zLibDll/minizip.html ) + + Copyright (C) 1998-2010 Gilles Vollant (minizip) ( http://www.winimage.com/zLibDll/minizip.html ) + + Modifications for Zip64 support + Copyright (C) 2009-2010 Mathias Svensson ( http://result42.com ) + + For more info read MiniZip_info.txt + +*/ + +#if defined(_WIN32) && (!(defined(_CRT_SECURE_NO_WARNINGS))) + #define _CRT_SECURE_NO_WARNINGS +#endif + +#if defined(__APPLE__) || defined(IOAPI_NO_64) || defined(__HAIKU__) || defined(MINIZIP_FOPEN_NO_64) +// In darwin and perhaps other BSD variants off_t is a 64 bit value, hence no need for specific 64 bit functions +#define FOPEN_FUNC(filename, mode) fopen(filename, mode) +#define FTELLO_FUNC(stream) ftello(stream) +#define FSEEKO_FUNC(stream, offset, origin) fseeko(stream, offset, origin) +#else +#define FOPEN_FUNC(filename, mode) fopen64(filename, mode) +#define FTELLO_FUNC(stream) ftello64(stream) +#define FSEEKO_FUNC(stream, offset, origin) fseeko64(stream, offset, origin) +#endif + + +#include "ioapi.h" + +voidpf call_zopen64 (const zlib_filefunc64_32_def* pfilefunc, const void*filename, int mode) { + if (pfilefunc->zfile_func64.zopen64_file != NULL) + return (*(pfilefunc->zfile_func64.zopen64_file)) (pfilefunc->zfile_func64.opaque,filename,mode); + else + { + return (*(pfilefunc->zopen32_file))(pfilefunc->zfile_func64.opaque,(const char*)filename,mode); + } +} + +long call_zseek64 (const zlib_filefunc64_32_def* pfilefunc,voidpf filestream, ZPOS64_T offset, int origin) { + if (pfilefunc->zfile_func64.zseek64_file != NULL) + return (*(pfilefunc->zfile_func64.zseek64_file)) (pfilefunc->zfile_func64.opaque,filestream,offset,origin); + else + { + uLong offsetTruncated = (uLong)offset; + if (offsetTruncated != offset) + return -1; + else + return (*(pfilefunc->zseek32_file))(pfilefunc->zfile_func64.opaque,filestream,offsetTruncated,origin); + } +} + +ZPOS64_T call_ztell64 (const zlib_filefunc64_32_def* pfilefunc, voidpf filestream) { + if (pfilefunc->zfile_func64.zseek64_file != NULL) + return (*(pfilefunc->zfile_func64.ztell64_file)) (pfilefunc->zfile_func64.opaque,filestream); + else + { + uLong tell_uLong = (uLong)(*(pfilefunc->ztell32_file))(pfilefunc->zfile_func64.opaque,filestream); + if ((tell_uLong) == MAXU32) + return (ZPOS64_T)-1; + else + return tell_uLong; + } +} + +void fill_zlib_filefunc64_32_def_from_filefunc32(zlib_filefunc64_32_def* p_filefunc64_32, const zlib_filefunc_def* p_filefunc32) { + p_filefunc64_32->zfile_func64.zopen64_file = NULL; + p_filefunc64_32->zopen32_file = p_filefunc32->zopen_file; + p_filefunc64_32->zfile_func64.zread_file = p_filefunc32->zread_file; + p_filefunc64_32->zfile_func64.zwrite_file = p_filefunc32->zwrite_file; + p_filefunc64_32->zfile_func64.ztell64_file = NULL; + p_filefunc64_32->zfile_func64.zseek64_file = NULL; + p_filefunc64_32->zfile_func64.zclose_file = p_filefunc32->zclose_file; + p_filefunc64_32->zfile_func64.zerror_file = p_filefunc32->zerror_file; + p_filefunc64_32->zfile_func64.opaque = p_filefunc32->opaque; + p_filefunc64_32->zseek32_file = p_filefunc32->zseek_file; + p_filefunc64_32->ztell32_file = p_filefunc32->ztell_file; +} + + + +static voidpf ZCALLBACK fopen_file_func(voidpf opaque, const char* filename, int mode) { + FILE* file = NULL; + const char* mode_fopen = NULL; + (void)opaque; + if ((mode & ZLIB_FILEFUNC_MODE_READWRITEFILTER)==ZLIB_FILEFUNC_MODE_READ) + mode_fopen = "rb"; + else + if (mode & ZLIB_FILEFUNC_MODE_EXISTING) + mode_fopen = "r+b"; + else + if (mode & ZLIB_FILEFUNC_MODE_CREATE) + mode_fopen = "wb"; + + if ((filename!=NULL) && (mode_fopen != NULL)) + file = fopen(filename, mode_fopen); + return file; +} + +static voidpf ZCALLBACK fopen64_file_func(voidpf opaque, const void* filename, int mode) { + FILE* file = NULL; + const char* mode_fopen = NULL; + (void)opaque; + if ((mode & ZLIB_FILEFUNC_MODE_READWRITEFILTER)==ZLIB_FILEFUNC_MODE_READ) + mode_fopen = "rb"; + else + if (mode & ZLIB_FILEFUNC_MODE_EXISTING) + mode_fopen = "r+b"; + else + if (mode & ZLIB_FILEFUNC_MODE_CREATE) + mode_fopen = "wb"; + + if ((filename!=NULL) && (mode_fopen != NULL)) + file = FOPEN_FUNC((const char*)filename, mode_fopen); + return file; +} + + +static uLong ZCALLBACK fread_file_func(voidpf opaque, voidpf stream, void* buf, uLong size) { + uLong ret; + (void)opaque; + ret = (uLong)fread(buf, 1, (size_t)size, (FILE *)stream); + return ret; +} + +static uLong ZCALLBACK fwrite_file_func(voidpf opaque, voidpf stream, const void* buf, uLong size) { + uLong ret; + (void)opaque; + ret = (uLong)fwrite(buf, 1, (size_t)size, (FILE *)stream); + return ret; +} + +static long ZCALLBACK ftell_file_func(voidpf opaque, voidpf stream) { + long ret; + (void)opaque; + ret = ftell((FILE *)stream); + return ret; +} + + +static ZPOS64_T ZCALLBACK ftell64_file_func(voidpf opaque, voidpf stream) { + ZPOS64_T ret; + (void)opaque; + ret = (ZPOS64_T)FTELLO_FUNC((FILE *)stream); + return ret; +} + +static long ZCALLBACK fseek_file_func(voidpf opaque, voidpf stream, uLong offset, int origin) { + int fseek_origin=0; + long ret; + (void)opaque; + switch (origin) + { + case ZLIB_FILEFUNC_SEEK_CUR : + fseek_origin = SEEK_CUR; + break; + case ZLIB_FILEFUNC_SEEK_END : + fseek_origin = SEEK_END; + break; + case ZLIB_FILEFUNC_SEEK_SET : + fseek_origin = SEEK_SET; + break; + default: return -1; + } + ret = 0; + if (fseek((FILE *)stream, (long)offset, fseek_origin) != 0) + ret = -1; + return ret; +} + +static long ZCALLBACK fseek64_file_func(voidpf opaque, voidpf stream, ZPOS64_T offset, int origin) { + int fseek_origin=0; + long ret; + (void)opaque; + switch (origin) + { + case ZLIB_FILEFUNC_SEEK_CUR : + fseek_origin = SEEK_CUR; + break; + case ZLIB_FILEFUNC_SEEK_END : + fseek_origin = SEEK_END; + break; + case ZLIB_FILEFUNC_SEEK_SET : + fseek_origin = SEEK_SET; + break; + default: return -1; + } + ret = 0; + + if(FSEEKO_FUNC((FILE *)stream, (z_off64_t)offset, fseek_origin) != 0) + ret = -1; + + return ret; +} + + +static int ZCALLBACK fclose_file_func(voidpf opaque, voidpf stream) { + int ret; + (void)opaque; + ret = fclose((FILE *)stream); + return ret; +} + +static int ZCALLBACK ferror_file_func(voidpf opaque, voidpf stream) { + int ret; + (void)opaque; + ret = ferror((FILE *)stream); + return ret; +} + +void fill_fopen_filefunc(zlib_filefunc_def* pzlib_filefunc_def) { + pzlib_filefunc_def->zopen_file = fopen_file_func; + pzlib_filefunc_def->zread_file = fread_file_func; + pzlib_filefunc_def->zwrite_file = fwrite_file_func; + pzlib_filefunc_def->ztell_file = ftell_file_func; + pzlib_filefunc_def->zseek_file = fseek_file_func; + pzlib_filefunc_def->zclose_file = fclose_file_func; + pzlib_filefunc_def->zerror_file = ferror_file_func; + pzlib_filefunc_def->opaque = NULL; +} + +void fill_fopen64_filefunc(zlib_filefunc64_def* pzlib_filefunc_def) { + pzlib_filefunc_def->zopen64_file = fopen64_file_func; + pzlib_filefunc_def->zread_file = fread_file_func; + pzlib_filefunc_def->zwrite_file = fwrite_file_func; + pzlib_filefunc_def->ztell64_file = ftell64_file_func; + pzlib_filefunc_def->zseek64_file = fseek64_file_func; + pzlib_filefunc_def->zclose_file = fclose_file_func; + pzlib_filefunc_def->zerror_file = ferror_file_func; + pzlib_filefunc_def->opaque = NULL; +} diff --git a/c-blosc/internal-complibs/zlib-1.3.1/contrib/minizip/ioapi.h b/c-blosc/internal-complibs/zlib-1.3.1/contrib/minizip/ioapi.h new file mode 100644 index 0000000..a2d2e6e --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.3.1/contrib/minizip/ioapi.h @@ -0,0 +1,210 @@ +/* ioapi.h -- IO base function header for compress/uncompress .zip + part of the MiniZip project - ( http://www.winimage.com/zLibDll/minizip.html ) + + Copyright (C) 1998-2010 Gilles Vollant (minizip) ( http://www.winimage.com/zLibDll/minizip.html ) + + Modifications for Zip64 support + Copyright (C) 2009-2010 Mathias Svensson ( http://result42.com ) + + For more info read MiniZip_info.txt + + Changes + + Oct-2009 - Defined ZPOS64_T to fpos_t on windows and u_int64_t on linux. (might need to find a better why for this) + Oct-2009 - Change to fseeko64, ftello64 and fopen64 so large files would work on linux. + More if/def section may be needed to support other platforms + Oct-2009 - Defined fxxxx64 calls to normal fopen/ftell/fseek so they would compile on windows. + (but you should use iowin32.c for windows instead) + +*/ + +#ifndef _ZLIBIOAPI64_H +#define _ZLIBIOAPI64_H + +#if (!defined(_WIN32)) && (!defined(WIN32)) && (!defined(__APPLE__)) + + // Linux needs this to support file operation on files larger then 4+GB + // But might need better if/def to select just the platforms that needs them. + + #ifndef __USE_FILE_OFFSET64 + #define __USE_FILE_OFFSET64 + #endif + #ifndef __USE_LARGEFILE64 + #define __USE_LARGEFILE64 + #endif + #ifndef _LARGEFILE64_SOURCE + #define _LARGEFILE64_SOURCE + #endif + #ifndef _FILE_OFFSET_BIT + #define _FILE_OFFSET_BIT 64 + #endif + +#endif + +#include +#include +#include "zlib.h" + +#if defined(USE_FILE32API) +#define fopen64 fopen +#define ftello64 ftell +#define fseeko64 fseek +#else +#if defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__NetBSD__) || defined(__HAIKU__) || defined(MINIZIP_FOPEN_NO_64) +#define fopen64 fopen +#define ftello64 ftello +#define fseeko64 fseeko +#endif +#ifdef _MSC_VER + #define fopen64 fopen + #if (_MSC_VER >= 1400) && (!(defined(NO_MSCVER_FILE64_FUNC))) + #define ftello64 _ftelli64 + #define fseeko64 _fseeki64 + #else // old MSC + #define ftello64 ftell + #define fseeko64 fseek + #endif +#endif +#endif + +/* +#ifndef ZPOS64_T + #ifdef _WIN32 + #define ZPOS64_T fpos_t + #else + #include + #define ZPOS64_T uint64_t + #endif +#endif +*/ + +#ifdef HAVE_MINIZIP64_CONF_H +#include "mz64conf.h" +#endif + +/* a type chosen by DEFINE */ +#ifdef HAVE_64BIT_INT_CUSTOM +typedef 64BIT_INT_CUSTOM_TYPE ZPOS64_T; +#else +#ifdef HAS_STDINT_H +#include "stdint.h" +typedef uint64_t ZPOS64_T; +#else + + + +#if defined(_MSC_VER) || defined(__BORLANDC__) +typedef unsigned __int64 ZPOS64_T; +#else +typedef unsigned long long int ZPOS64_T; +#endif +#endif +#endif + +/* Maximum unsigned 32-bit value used as placeholder for zip64 */ +#ifndef MAXU32 +#define MAXU32 (0xffffffff) +#endif + +#ifdef __cplusplus +extern "C" { +#endif + + +#define ZLIB_FILEFUNC_SEEK_CUR (1) +#define ZLIB_FILEFUNC_SEEK_END (2) +#define ZLIB_FILEFUNC_SEEK_SET (0) + +#define ZLIB_FILEFUNC_MODE_READ (1) +#define ZLIB_FILEFUNC_MODE_WRITE (2) +#define ZLIB_FILEFUNC_MODE_READWRITEFILTER (3) + +#define ZLIB_FILEFUNC_MODE_EXISTING (4) +#define ZLIB_FILEFUNC_MODE_CREATE (8) + + +#ifndef ZCALLBACK + #if (defined(WIN32) || defined(_WIN32) || defined (WINDOWS) || defined (_WINDOWS)) && defined(CALLBACK) && defined (USEWINDOWS_CALLBACK) + #define ZCALLBACK CALLBACK + #else + #define ZCALLBACK + #endif +#endif + + + + +typedef voidpf (ZCALLBACK *open_file_func) (voidpf opaque, const char* filename, int mode); +typedef uLong (ZCALLBACK *read_file_func) (voidpf opaque, voidpf stream, void* buf, uLong size); +typedef uLong (ZCALLBACK *write_file_func) (voidpf opaque, voidpf stream, const void* buf, uLong size); +typedef int (ZCALLBACK *close_file_func) (voidpf opaque, voidpf stream); +typedef int (ZCALLBACK *testerror_file_func) (voidpf opaque, voidpf stream); + +typedef long (ZCALLBACK *tell_file_func) (voidpf opaque, voidpf stream); +typedef long (ZCALLBACK *seek_file_func) (voidpf opaque, voidpf stream, uLong offset, int origin); + + +/* here is the "old" 32 bits structure */ +typedef struct zlib_filefunc_def_s +{ + open_file_func zopen_file; + read_file_func zread_file; + write_file_func zwrite_file; + tell_file_func ztell_file; + seek_file_func zseek_file; + close_file_func zclose_file; + testerror_file_func zerror_file; + voidpf opaque; +} zlib_filefunc_def; + +typedef ZPOS64_T (ZCALLBACK *tell64_file_func) (voidpf opaque, voidpf stream); +typedef long (ZCALLBACK *seek64_file_func) (voidpf opaque, voidpf stream, ZPOS64_T offset, int origin); +typedef voidpf (ZCALLBACK *open64_file_func) (voidpf opaque, const void* filename, int mode); + +typedef struct zlib_filefunc64_def_s +{ + open64_file_func zopen64_file; + read_file_func zread_file; + write_file_func zwrite_file; + tell64_file_func ztell64_file; + seek64_file_func zseek64_file; + close_file_func zclose_file; + testerror_file_func zerror_file; + voidpf opaque; +} zlib_filefunc64_def; + +void fill_fopen64_filefunc(zlib_filefunc64_def* pzlib_filefunc_def); +void fill_fopen_filefunc(zlib_filefunc_def* pzlib_filefunc_def); + +/* now internal definition, only for zip.c and unzip.h */ +typedef struct zlib_filefunc64_32_def_s +{ + zlib_filefunc64_def zfile_func64; + open_file_func zopen32_file; + tell_file_func ztell32_file; + seek_file_func zseek32_file; +} zlib_filefunc64_32_def; + + +#define ZREAD64(filefunc,filestream,buf,size) ((*((filefunc).zfile_func64.zread_file)) ((filefunc).zfile_func64.opaque,filestream,buf,size)) +#define ZWRITE64(filefunc,filestream,buf,size) ((*((filefunc).zfile_func64.zwrite_file)) ((filefunc).zfile_func64.opaque,filestream,buf,size)) +//#define ZTELL64(filefunc,filestream) ((*((filefunc).ztell64_file)) ((filefunc).opaque,filestream)) +//#define ZSEEK64(filefunc,filestream,pos,mode) ((*((filefunc).zseek64_file)) ((filefunc).opaque,filestream,pos,mode)) +#define ZCLOSE64(filefunc,filestream) ((*((filefunc).zfile_func64.zclose_file)) ((filefunc).zfile_func64.opaque,filestream)) +#define ZERROR64(filefunc,filestream) ((*((filefunc).zfile_func64.zerror_file)) ((filefunc).zfile_func64.opaque,filestream)) + +voidpf call_zopen64(const zlib_filefunc64_32_def* pfilefunc,const void*filename,int mode); +long call_zseek64(const zlib_filefunc64_32_def* pfilefunc,voidpf filestream, ZPOS64_T offset, int origin); +ZPOS64_T call_ztell64(const zlib_filefunc64_32_def* pfilefunc,voidpf filestream); + +void fill_zlib_filefunc64_32_def_from_filefunc32(zlib_filefunc64_32_def* p_filefunc64_32,const zlib_filefunc_def* p_filefunc32); + +#define ZOPEN64(filefunc,filename,mode) (call_zopen64((&(filefunc)),(filename),(mode))) +#define ZTELL64(filefunc,filestream) (call_ztell64((&(filefunc)),(filestream))) +#define ZSEEK64(filefunc,filestream,pos,mode) (call_zseek64((&(filefunc)),(filestream),(pos),(mode))) + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/c-blosc/internal-complibs/zlib-1.3.1/contrib/minizip/iowin32.c b/c-blosc/internal-complibs/zlib-1.3.1/contrib/minizip/iowin32.c new file mode 100644 index 0000000..08536e9 --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.3.1/contrib/minizip/iowin32.c @@ -0,0 +1,440 @@ +/* iowin32.c -- IO base function header for compress/uncompress .zip + Version 1.1, February 14h, 2010 + part of the MiniZip project - ( http://www.winimage.com/zLibDll/minizip.html ) + + Copyright (C) 1998-2010 Gilles Vollant (minizip) ( http://www.winimage.com/zLibDll/minizip.html ) + + Modifications for Zip64 support + Copyright (C) 2009-2010 Mathias Svensson ( http://result42.com ) + + For more info read MiniZip_info.txt + +*/ + +#include + +#include "zlib.h" +#include "ioapi.h" +#include "iowin32.h" + +#ifndef INVALID_HANDLE_VALUE +#define INVALID_HANDLE_VALUE (0xFFFFFFFF) +#endif + +#ifndef INVALID_SET_FILE_POINTER +#define INVALID_SET_FILE_POINTER ((DWORD)-1) +#endif + + +// see Include/shared/winapifamily.h in the Windows Kit +#if defined(WINAPI_FAMILY_PARTITION) && (!(defined(IOWIN32_USING_WINRT_API))) + +#if !defined(WINAPI_FAMILY_ONE_PARTITION) +#define WINAPI_FAMILY_ONE_PARTITION(PartitionSet, Partition) ((WINAPI_FAMILY & PartitionSet) == Partition) +#endif + +#if WINAPI_FAMILY_ONE_PARTITION(WINAPI_FAMILY, WINAPI_PARTITION_APP) +#define IOWIN32_USING_WINRT_API 1 +#endif +#endif + +typedef struct +{ + HANDLE hf; + int error; +} WIN32FILE_IOWIN; + + +static void win32_translate_open_mode(int mode, + DWORD* lpdwDesiredAccess, + DWORD* lpdwCreationDisposition, + DWORD* lpdwShareMode, + DWORD* lpdwFlagsAndAttributes) { + *lpdwDesiredAccess = *lpdwShareMode = *lpdwFlagsAndAttributes = *lpdwCreationDisposition = 0; + + if ((mode & ZLIB_FILEFUNC_MODE_READWRITEFILTER)==ZLIB_FILEFUNC_MODE_READ) + { + *lpdwDesiredAccess = GENERIC_READ; + *lpdwCreationDisposition = OPEN_EXISTING; + *lpdwShareMode = FILE_SHARE_READ; + } + else if (mode & ZLIB_FILEFUNC_MODE_EXISTING) + { + *lpdwDesiredAccess = GENERIC_WRITE | GENERIC_READ; + *lpdwCreationDisposition = OPEN_EXISTING; + } + else if (mode & ZLIB_FILEFUNC_MODE_CREATE) + { + *lpdwDesiredAccess = GENERIC_WRITE | GENERIC_READ; + *lpdwCreationDisposition = CREATE_ALWAYS; + } +} + +static voidpf win32_build_iowin(HANDLE hFile) { + voidpf ret=NULL; + + if ((hFile != NULL) && (hFile != INVALID_HANDLE_VALUE)) + { + WIN32FILE_IOWIN w32fiow; + w32fiow.hf = hFile; + w32fiow.error = 0; + ret = malloc(sizeof(WIN32FILE_IOWIN)); + + if (ret==NULL) + CloseHandle(hFile); + else + *((WIN32FILE_IOWIN*)ret) = w32fiow; + } + return ret; +} + +voidpf ZCALLBACK win32_open64_file_func(voidpf opaque, const void* filename, int mode) { + const char* mode_fopen = NULL; + DWORD dwDesiredAccess,dwCreationDisposition,dwShareMode,dwFlagsAndAttributes ; + HANDLE hFile = NULL; + + win32_translate_open_mode(mode,&dwDesiredAccess,&dwCreationDisposition,&dwShareMode,&dwFlagsAndAttributes); + +#ifdef IOWIN32_USING_WINRT_API +#ifdef UNICODE + if ((filename!=NULL) && (dwDesiredAccess != 0)) + hFile = CreateFile2((LPCTSTR)filename, dwDesiredAccess, dwShareMode, dwCreationDisposition, NULL); +#else + if ((filename!=NULL) && (dwDesiredAccess != 0)) + { + WCHAR filenameW[FILENAME_MAX + 0x200 + 1]; + MultiByteToWideChar(CP_ACP,0,(const char*)filename,-1,filenameW,FILENAME_MAX + 0x200); + hFile = CreateFile2(filenameW, dwDesiredAccess, dwShareMode, dwCreationDisposition, NULL); + } +#endif +#else + if ((filename!=NULL) && (dwDesiredAccess != 0)) + hFile = CreateFile((LPCTSTR)filename, dwDesiredAccess, dwShareMode, NULL, dwCreationDisposition, dwFlagsAndAttributes, NULL); +#endif + + return win32_build_iowin(hFile); +} + + +voidpf ZCALLBACK win32_open64_file_funcA(voidpf opaque, const void* filename, int mode) { + const char* mode_fopen = NULL; + DWORD dwDesiredAccess,dwCreationDisposition,dwShareMode,dwFlagsAndAttributes ; + HANDLE hFile = NULL; + + win32_translate_open_mode(mode,&dwDesiredAccess,&dwCreationDisposition,&dwShareMode,&dwFlagsAndAttributes); + +#ifdef IOWIN32_USING_WINRT_API + if ((filename!=NULL) && (dwDesiredAccess != 0)) + { + WCHAR filenameW[FILENAME_MAX + 0x200 + 1]; + MultiByteToWideChar(CP_ACP,0,(const char*)filename,-1,filenameW,FILENAME_MAX + 0x200); + hFile = CreateFile2(filenameW, dwDesiredAccess, dwShareMode, dwCreationDisposition, NULL); + } +#else + if ((filename!=NULL) && (dwDesiredAccess != 0)) + hFile = CreateFileA((LPCSTR)filename, dwDesiredAccess, dwShareMode, NULL, dwCreationDisposition, dwFlagsAndAttributes, NULL); +#endif + + return win32_build_iowin(hFile); +} + + +voidpf ZCALLBACK win32_open64_file_funcW(voidpf opaque, const void* filename, int mode) { + const char* mode_fopen = NULL; + DWORD dwDesiredAccess,dwCreationDisposition,dwShareMode,dwFlagsAndAttributes ; + HANDLE hFile = NULL; + + win32_translate_open_mode(mode,&dwDesiredAccess,&dwCreationDisposition,&dwShareMode,&dwFlagsAndAttributes); + +#ifdef IOWIN32_USING_WINRT_API + if ((filename!=NULL) && (dwDesiredAccess != 0)) + hFile = CreateFile2((LPCWSTR)filename, dwDesiredAccess, dwShareMode, dwCreationDisposition,NULL); +#else + if ((filename!=NULL) && (dwDesiredAccess != 0)) + hFile = CreateFileW((LPCWSTR)filename, dwDesiredAccess, dwShareMode, NULL, dwCreationDisposition, dwFlagsAndAttributes, NULL); +#endif + + return win32_build_iowin(hFile); +} + + +voidpf ZCALLBACK win32_open_file_func(voidpf opaque, const char* filename, int mode) { + const char* mode_fopen = NULL; + DWORD dwDesiredAccess,dwCreationDisposition,dwShareMode,dwFlagsAndAttributes ; + HANDLE hFile = NULL; + + win32_translate_open_mode(mode,&dwDesiredAccess,&dwCreationDisposition,&dwShareMode,&dwFlagsAndAttributes); + +#ifdef IOWIN32_USING_WINRT_API +#ifdef UNICODE + if ((filename!=NULL) && (dwDesiredAccess != 0)) + hFile = CreateFile2((LPCTSTR)filename, dwDesiredAccess, dwShareMode, dwCreationDisposition, NULL); +#else + if ((filename!=NULL) && (dwDesiredAccess != 0)) + { + WCHAR filenameW[FILENAME_MAX + 0x200 + 1]; + MultiByteToWideChar(CP_ACP,0,(const char*)filename,-1,filenameW,FILENAME_MAX + 0x200); + hFile = CreateFile2(filenameW, dwDesiredAccess, dwShareMode, dwCreationDisposition, NULL); + } +#endif +#else + if ((filename!=NULL) && (dwDesiredAccess != 0)) + hFile = CreateFile((LPCTSTR)filename, dwDesiredAccess, dwShareMode, NULL, dwCreationDisposition, dwFlagsAndAttributes, NULL); +#endif + + return win32_build_iowin(hFile); +} + + +uLong ZCALLBACK win32_read_file_func(voidpf opaque, voidpf stream, void* buf,uLong size) { + uLong ret=0; + HANDLE hFile = NULL; + if (stream!=NULL) + hFile = ((WIN32FILE_IOWIN*)stream) -> hf; + + if (hFile != NULL) + { + if (!ReadFile(hFile, buf, size, &ret, NULL)) + { + DWORD dwErr = GetLastError(); + if (dwErr == ERROR_HANDLE_EOF) + dwErr = 0; + ((WIN32FILE_IOWIN*)stream) -> error=(int)dwErr; + } + } + + return ret; +} + + +uLong ZCALLBACK win32_write_file_func(voidpf opaque, voidpf stream, const void* buf, uLong size) { + uLong ret=0; + HANDLE hFile = NULL; + if (stream!=NULL) + hFile = ((WIN32FILE_IOWIN*)stream) -> hf; + + if (hFile != NULL) + { + if (!WriteFile(hFile, buf, size, &ret, NULL)) + { + DWORD dwErr = GetLastError(); + if (dwErr == ERROR_HANDLE_EOF) + dwErr = 0; + ((WIN32FILE_IOWIN*)stream) -> error=(int)dwErr; + } + } + + return ret; +} + +static BOOL MySetFilePointerEx(HANDLE hFile, LARGE_INTEGER pos, LARGE_INTEGER *newPos, DWORD dwMoveMethod) { +#ifdef IOWIN32_USING_WINRT_API + return SetFilePointerEx(hFile, pos, newPos, dwMoveMethod); +#else + LONG lHigh = pos.HighPart; + DWORD dwNewPos = SetFilePointer(hFile, pos.LowPart, &lHigh, dwMoveMethod); + BOOL fOk = TRUE; + if (dwNewPos == 0xFFFFFFFF) + if (GetLastError() != NO_ERROR) + fOk = FALSE; + if ((newPos != NULL) && (fOk)) + { + newPos->LowPart = dwNewPos; + newPos->HighPart = lHigh; + } + return fOk; +#endif +} + +long ZCALLBACK win32_tell_file_func(voidpf opaque, voidpf stream) { + long ret=-1; + HANDLE hFile = NULL; + if (stream!=NULL) + hFile = ((WIN32FILE_IOWIN*)stream) -> hf; + if (hFile != NULL) + { + LARGE_INTEGER pos; + pos.QuadPart = 0; + + if (!MySetFilePointerEx(hFile, pos, &pos, FILE_CURRENT)) + { + DWORD dwErr = GetLastError(); + ((WIN32FILE_IOWIN*)stream) -> error=(int)dwErr; + ret = -1; + } + else + ret=(long)pos.LowPart; + } + return ret; +} + +ZPOS64_T ZCALLBACK win32_tell64_file_func(voidpf opaque, voidpf stream) { + ZPOS64_T ret= (ZPOS64_T)-1; + HANDLE hFile = NULL; + if (stream!=NULL) + hFile = ((WIN32FILE_IOWIN*)stream)->hf; + + if (hFile) + { + LARGE_INTEGER pos; + pos.QuadPart = 0; + + if (!MySetFilePointerEx(hFile, pos, &pos, FILE_CURRENT)) + { + DWORD dwErr = GetLastError(); + ((WIN32FILE_IOWIN*)stream) -> error=(int)dwErr; + ret = (ZPOS64_T)-1; + } + else + ret=pos.QuadPart; + } + return ret; +} + + +long ZCALLBACK win32_seek_file_func(voidpf opaque, voidpf stream, uLong offset, int origin) { + DWORD dwMoveMethod=0xFFFFFFFF; + HANDLE hFile = NULL; + + long ret=-1; + if (stream!=NULL) + hFile = ((WIN32FILE_IOWIN*)stream) -> hf; + switch (origin) + { + case ZLIB_FILEFUNC_SEEK_CUR : + dwMoveMethod = FILE_CURRENT; + break; + case ZLIB_FILEFUNC_SEEK_END : + dwMoveMethod = FILE_END; + break; + case ZLIB_FILEFUNC_SEEK_SET : + dwMoveMethod = FILE_BEGIN; + break; + default: return -1; + } + + if (hFile != NULL) + { + LARGE_INTEGER pos; + pos.QuadPart = offset; + if (!MySetFilePointerEx(hFile, pos, NULL, dwMoveMethod)) + { + DWORD dwErr = GetLastError(); + ((WIN32FILE_IOWIN*)stream) -> error=(int)dwErr; + ret = -1; + } + else + ret=0; + } + return ret; +} + +long ZCALLBACK win32_seek64_file_func(voidpf opaque, voidpf stream, ZPOS64_T offset, int origin) { + DWORD dwMoveMethod=0xFFFFFFFF; + HANDLE hFile = NULL; + long ret=-1; + + if (stream!=NULL) + hFile = ((WIN32FILE_IOWIN*)stream)->hf; + + switch (origin) + { + case ZLIB_FILEFUNC_SEEK_CUR : + dwMoveMethod = FILE_CURRENT; + break; + case ZLIB_FILEFUNC_SEEK_END : + dwMoveMethod = FILE_END; + break; + case ZLIB_FILEFUNC_SEEK_SET : + dwMoveMethod = FILE_BEGIN; + break; + default: return -1; + } + + if (hFile) + { + LARGE_INTEGER pos; + pos.QuadPart = offset; + if (!MySetFilePointerEx(hFile, pos, NULL, dwMoveMethod)) + { + DWORD dwErr = GetLastError(); + ((WIN32FILE_IOWIN*)stream) -> error=(int)dwErr; + ret = -1; + } + else + ret=0; + } + return ret; +} + +int ZCALLBACK win32_close_file_func(voidpf opaque, voidpf stream) { + int ret=-1; + + if (stream!=NULL) + { + HANDLE hFile; + hFile = ((WIN32FILE_IOWIN*)stream) -> hf; + if (hFile != NULL) + { + CloseHandle(hFile); + ret=0; + } + free(stream); + } + return ret; +} + +int ZCALLBACK win32_error_file_func(voidpf opaque, voidpf stream) { + int ret=-1; + if (stream!=NULL) + { + ret = ((WIN32FILE_IOWIN*)stream) -> error; + } + return ret; +} + +void fill_win32_filefunc(zlib_filefunc_def* pzlib_filefunc_def) { + pzlib_filefunc_def->zopen_file = win32_open_file_func; + pzlib_filefunc_def->zread_file = win32_read_file_func; + pzlib_filefunc_def->zwrite_file = win32_write_file_func; + pzlib_filefunc_def->ztell_file = win32_tell_file_func; + pzlib_filefunc_def->zseek_file = win32_seek_file_func; + pzlib_filefunc_def->zclose_file = win32_close_file_func; + pzlib_filefunc_def->zerror_file = win32_error_file_func; + pzlib_filefunc_def->opaque = NULL; +} + +void fill_win32_filefunc64(zlib_filefunc64_def* pzlib_filefunc_def) { + pzlib_filefunc_def->zopen64_file = win32_open64_file_func; + pzlib_filefunc_def->zread_file = win32_read_file_func; + pzlib_filefunc_def->zwrite_file = win32_write_file_func; + pzlib_filefunc_def->ztell64_file = win32_tell64_file_func; + pzlib_filefunc_def->zseek64_file = win32_seek64_file_func; + pzlib_filefunc_def->zclose_file = win32_close_file_func; + pzlib_filefunc_def->zerror_file = win32_error_file_func; + pzlib_filefunc_def->opaque = NULL; +} + + +void fill_win32_filefunc64A(zlib_filefunc64_def* pzlib_filefunc_def) { + pzlib_filefunc_def->zopen64_file = win32_open64_file_funcA; + pzlib_filefunc_def->zread_file = win32_read_file_func; + pzlib_filefunc_def->zwrite_file = win32_write_file_func; + pzlib_filefunc_def->ztell64_file = win32_tell64_file_func; + pzlib_filefunc_def->zseek64_file = win32_seek64_file_func; + pzlib_filefunc_def->zclose_file = win32_close_file_func; + pzlib_filefunc_def->zerror_file = win32_error_file_func; + pzlib_filefunc_def->opaque = NULL; +} + + +void fill_win32_filefunc64W(zlib_filefunc64_def* pzlib_filefunc_def) { + pzlib_filefunc_def->zopen64_file = win32_open64_file_funcW; + pzlib_filefunc_def->zread_file = win32_read_file_func; + pzlib_filefunc_def->zwrite_file = win32_write_file_func; + pzlib_filefunc_def->ztell64_file = win32_tell64_file_func; + pzlib_filefunc_def->zseek64_file = win32_seek64_file_func; + pzlib_filefunc_def->zclose_file = win32_close_file_func; + pzlib_filefunc_def->zerror_file = win32_error_file_func; + pzlib_filefunc_def->opaque = NULL; +} diff --git a/c-blosc/internal-complibs/zlib-1.3.1/contrib/minizip/iowin32.h b/c-blosc/internal-complibs/zlib-1.3.1/contrib/minizip/iowin32.h new file mode 100644 index 0000000..a23a65d --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.3.1/contrib/minizip/iowin32.h @@ -0,0 +1,28 @@ +/* iowin32.h -- IO base function header for compress/uncompress .zip + Version 1.1, February 14h, 2010 + part of the MiniZip project - ( http://www.winimage.com/zLibDll/minizip.html ) + + Copyright (C) 1998-2010 Gilles Vollant (minizip) ( http://www.winimage.com/zLibDll/minizip.html ) + + Modifications for Zip64 support + Copyright (C) 2009-2010 Mathias Svensson ( http://result42.com ) + + For more info read MiniZip_info.txt + +*/ + +#include + + +#ifdef __cplusplus +extern "C" { +#endif + +void fill_win32_filefunc(zlib_filefunc_def* pzlib_filefunc_def); +void fill_win32_filefunc64(zlib_filefunc64_def* pzlib_filefunc_def); +void fill_win32_filefunc64A(zlib_filefunc64_def* pzlib_filefunc_def); +void fill_win32_filefunc64W(zlib_filefunc64_def* pzlib_filefunc_def); + +#ifdef __cplusplus +} +#endif diff --git a/c-blosc/internal-complibs/zlib-1.3.1/contrib/minizip/miniunz.c b/c-blosc/internal-complibs/zlib-1.3.1/contrib/minizip/miniunz.c new file mode 100644 index 0000000..d627c42 --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.3.1/contrib/minizip/miniunz.c @@ -0,0 +1,647 @@ +/* + miniunz.c + Version 1.1, February 14h, 2010 + sample part of the MiniZip project - ( http://www.winimage.com/zLibDll/minizip.html ) + + Copyright (C) 1998-2010 Gilles Vollant (minizip) ( http://www.winimage.com/zLibDll/minizip.html ) + + Modifications of Unzip for Zip64 + Copyright (C) 2007-2008 Even Rouault + + Modifications for Zip64 support on both zip and unzip + Copyright (C) 2009-2010 Mathias Svensson ( http://result42.com ) +*/ + +#if (!defined(_WIN32)) && (!defined(WIN32)) && (!defined(__APPLE__)) + #ifndef __USE_FILE_OFFSET64 + #define __USE_FILE_OFFSET64 + #endif + #ifndef __USE_LARGEFILE64 + #define __USE_LARGEFILE64 + #endif + #ifndef _LARGEFILE64_SOURCE + #define _LARGEFILE64_SOURCE + #endif + #ifndef _FILE_OFFSET_BIT + #define _FILE_OFFSET_BIT 64 + #endif +#endif + +#if defined(__APPLE__) || defined(__HAIKU__) || defined(MINIZIP_FOPEN_NO_64) +// In darwin and perhaps other BSD variants off_t is a 64 bit value, hence no need for specific 64 bit functions +#define FOPEN_FUNC(filename, mode) fopen(filename, mode) +#define FTELLO_FUNC(stream) ftello(stream) +#define FSEEKO_FUNC(stream, offset, origin) fseeko(stream, offset, origin) +#else +#define FOPEN_FUNC(filename, mode) fopen64(filename, mode) +#define FTELLO_FUNC(stream) ftello64(stream) +#define FSEEKO_FUNC(stream, offset, origin) fseeko64(stream, offset, origin) +#endif + + +#include +#include +#include +#include +#include +#include +#include + +#ifdef _WIN32 +# include +# include +#else +# include +# include +#endif + + +#include "unzip.h" + +#define CASESENSITIVITY (0) +#define WRITEBUFFERSIZE (8192) +#define MAXFILENAME (256) + +#ifdef _WIN32 +#define USEWIN32IOAPI +#include "iowin32.h" +#endif +/* + mini unzip, demo of unzip package + + usage : + Usage : miniunz [-exvlo] file.zip [file_to_extract] [-d extractdir] + + list the file in the zipfile, and print the content of FILE_ID.ZIP or README.TXT + if it exists +*/ + + +/* change_file_date : change the date/time of a file + filename : the filename of the file where date/time must be modified + dosdate : the new date at the MSDOS format (4 bytes) + tmu_date : the SAME new date at the tm_unz format */ +static void change_file_date(const char *filename, uLong dosdate, tm_unz tmu_date) { +#ifdef _WIN32 + HANDLE hFile; + FILETIME ftm,ftLocal,ftCreate,ftLastAcc,ftLastWrite; + + hFile = CreateFileA(filename,GENERIC_READ | GENERIC_WRITE, + 0,NULL,OPEN_EXISTING,0,NULL); + GetFileTime(hFile,&ftCreate,&ftLastAcc,&ftLastWrite); + DosDateTimeToFileTime((WORD)(dosdate>>16),(WORD)dosdate,&ftLocal); + LocalFileTimeToFileTime(&ftLocal,&ftm); + SetFileTime(hFile,&ftm,&ftLastAcc,&ftm); + CloseHandle(hFile); +#else +#if defined(unix) || defined(__APPLE__) + (void)dosdate; + struct utimbuf ut; + struct tm newdate; + newdate.tm_sec = tmu_date.tm_sec; + newdate.tm_min=tmu_date.tm_min; + newdate.tm_hour=tmu_date.tm_hour; + newdate.tm_mday=tmu_date.tm_mday; + newdate.tm_mon=tmu_date.tm_mon; + if (tmu_date.tm_year > 1900) + newdate.tm_year=tmu_date.tm_year - 1900; + else + newdate.tm_year=tmu_date.tm_year ; + newdate.tm_isdst=-1; + + ut.actime=ut.modtime=mktime(&newdate); + utime(filename,&ut); +#else + (void)filename; + (void)dosdate; + (void)tmu_date; +#endif +#endif +} + + +/* mymkdir and change_file_date are not 100 % portable + As I don't know well Unix, I wait feedback for the unix portion */ + +static int mymkdir(const char* dirname) { + int ret=0; +#ifdef _WIN32 + ret = _mkdir(dirname); +#elif unix + ret = mkdir (dirname,0775); +#elif __APPLE__ + ret = mkdir (dirname,0775); +#else + (void)dirname; +#endif + return ret; +} + +static int makedir(const char *newdir) { + char *buffer ; + char *p; + size_t len = strlen(newdir); + + if (len == 0) + return 0; + + buffer = (char*)malloc(len+1); + if (buffer==NULL) + { + printf("Error allocating memory\n"); + return UNZ_INTERNALERROR; + } + strcpy(buffer,newdir); + + if (buffer[len-1] == '/') { + buffer[len-1] = '\0'; + } + if (mymkdir(buffer) == 0) + { + free(buffer); + return 1; + } + + p = buffer+1; + while (1) + { + char hold; + + while(*p && *p != '\\' && *p != '/') + p++; + hold = *p; + *p = 0; + if ((mymkdir(buffer) == -1) && (errno == ENOENT)) + { + printf("couldn't create directory %s\n",buffer); + free(buffer); + return 0; + } + if (hold == 0) + break; + *p++ = hold; + } + free(buffer); + return 1; +} + +static void do_banner(void) { + printf("MiniUnz 1.1, demo of zLib + Unz package written by Gilles Vollant\n"); + printf("more info at http://www.winimage.com/zLibDll/unzip.html\n\n"); +} + +static void do_help(void) { + printf("Usage : miniunz [-e] [-x] [-v] [-l] [-o] [-p password] file.zip [file_to_extr.] [-d extractdir]\n\n" \ + " -e Extract without pathname (junk paths)\n" \ + " -x Extract with pathname\n" \ + " -v list files\n" \ + " -l list files\n" \ + " -d directory to extract into\n" \ + " -o overwrite files without prompting\n" \ + " -p extract encrypted file using password\n\n"); +} + +static void Display64BitsSize(ZPOS64_T n, int size_char) { + /* to avoid compatibility problem , we do here the conversion */ + char number[21]; + int offset=19; + int pos_string = 19; + number[20]=0; + for (;;) { + number[offset]=(char)((n%10)+'0'); + if (number[offset] != '0') + pos_string=offset; + n/=10; + if (offset==0) + break; + offset--; + } + { + int size_display_string = 19-pos_string; + while (size_char > size_display_string) + { + size_char--; + printf(" "); + } + } + + printf("%s",&number[pos_string]); +} + +static int do_list(unzFile uf) { + uLong i; + unz_global_info64 gi; + int err; + + err = unzGetGlobalInfo64(uf,&gi); + if (err!=UNZ_OK) + printf("error %d with zipfile in unzGetGlobalInfo \n",err); + printf(" Length Method Size Ratio Date Time CRC-32 Name\n"); + printf(" ------ ------ ---- ----- ---- ---- ------ ----\n"); + for (i=0;i0) + ratio = (uLong)((file_info.compressed_size*100)/file_info.uncompressed_size); + + /* display a '*' if the file is encrypted */ + if ((file_info.flag & 1) != 0) + charCrypt='*'; + + if (file_info.compression_method==0) + string_method="Stored"; + else + if (file_info.compression_method==Z_DEFLATED) + { + uInt iLevel=(uInt)((file_info.flag & 0x6)/2); + if (iLevel==0) + string_method="Defl:N"; + else if (iLevel==1) + string_method="Defl:X"; + else if ((iLevel==2) || (iLevel==3)) + string_method="Defl:F"; /* 2:fast , 3 : extra fast*/ + } + else + if (file_info.compression_method==Z_BZIP2ED) + { + string_method="BZip2 "; + } + else + string_method="Unkn. "; + + Display64BitsSize(file_info.uncompressed_size,7); + printf(" %6s%c",string_method,charCrypt); + Display64BitsSize(file_info.compressed_size,7); + printf(" %3lu%% %2.2lu-%2.2lu-%2.2lu %2.2lu:%2.2lu %8.8lx %s\n", + ratio, + (uLong)file_info.tmu_date.tm_mon + 1, + (uLong)file_info.tmu_date.tm_mday, + (uLong)file_info.tmu_date.tm_year % 100, + (uLong)file_info.tmu_date.tm_hour,(uLong)file_info.tmu_date.tm_min, + (uLong)file_info.crc,filename_inzip); + if ((i+1)='a') && (rep<='z')) + rep -= 0x20; + } + while ((rep!='Y') && (rep!='N') && (rep!='A')); + } + + if (rep == 'N') + skip = 1; + + if (rep == 'A') + *popt_overwrite=1; + } + + if ((skip==0) && (err==UNZ_OK)) + { + fout=FOPEN_FUNC(write_filename,"wb"); + /* some zipfile don't contain directory alone before file */ + if ((fout==NULL) && ((*popt_extract_without_path)==0) && + (filename_withoutpath!=(char*)filename_inzip)) + { + char c=*(filename_withoutpath-1); + *(filename_withoutpath-1)='\0'; + makedir(write_filename); + *(filename_withoutpath-1)=c; + fout=FOPEN_FUNC(write_filename,"wb"); + } + + if (fout==NULL) + { + printf("error opening %s\n",write_filename); + } + } + + if (fout!=NULL) + { + printf(" extracting: %s\n",write_filename); + + do + { + err = unzReadCurrentFile(uf,buf,size_buf); + if (err<0) + { + printf("error %d with zipfile in unzReadCurrentFile\n",err); + break; + } + if (err>0) + if (fwrite(buf,(unsigned)err,1,fout)!=1) + { + printf("error in writing extracted file\n"); + err=UNZ_ERRNO; + break; + } + } + while (err>0); + if (fout) + fclose(fout); + + if (err==0) + change_file_date(write_filename,file_info.dosDate, + file_info.tmu_date); + } + + if (err==UNZ_OK) + { + err = unzCloseCurrentFile (uf); + if (err!=UNZ_OK) + { + printf("error %d with zipfile in unzCloseCurrentFile\n",err); + } + } + else + unzCloseCurrentFile(uf); /* don't lose the error */ + } + + free(buf); + return err; +} + + +static int do_extract(unzFile uf, int opt_extract_without_path, int opt_overwrite, const char* password) { + uLong i; + unz_global_info64 gi; + int err; + + err = unzGetGlobalInfo64(uf,&gi); + if (err!=UNZ_OK) + printf("error %d with zipfile in unzGetGlobalInfo \n",err); + + for (i=0;i +#include +#include +#include +#include +#include + +#ifdef _WIN32 +# include +# include +#else +# include +# include +# include +# include +#endif + +#include "zip.h" + +#ifdef _WIN32 + #define USEWIN32IOAPI + #include "iowin32.h" +#endif + + + +#define WRITEBUFFERSIZE (16384) +#define MAXFILENAME (256) + +#ifdef _WIN32 +/* f: name of file to get info on, tmzip: return value: access, + modification and creation times, dt: dostime */ +static int filetime(const char *f, tm_zip *tmzip, uLong *dt) { + int ret = 0; + { + FILETIME ftLocal; + HANDLE hFind; + WIN32_FIND_DATAA ff32; + + hFind = FindFirstFileA(f,&ff32); + if (hFind != INVALID_HANDLE_VALUE) + { + FileTimeToLocalFileTime(&(ff32.ftLastWriteTime),&ftLocal); + FileTimeToDosDateTime(&ftLocal,((LPWORD)dt)+1,((LPWORD)dt)+0); + FindClose(hFind); + ret = 1; + } + } + return ret; +} +#else +#if defined(unix) || defined(__APPLE__) +/* f: name of file to get info on, tmzip: return value: access, + modification and creation times, dt: dostime */ +static int filetime(const char *f, tm_zip *tmzip, uLong *dt) { + (void)dt; + int ret=0; + struct stat s; /* results of stat() */ + struct tm* filedate; + time_t tm_t=0; + + if (strcmp(f,"-")!=0) + { + char name[MAXFILENAME+1]; + size_t len = strlen(f); + if (len > MAXFILENAME) + len = MAXFILENAME; + + strncpy(name, f,MAXFILENAME-1); + /* strncpy doesn't append the trailing NULL, of the string is too long. */ + name[ MAXFILENAME ] = '\0'; + + if (name[len - 1] == '/') + name[len - 1] = '\0'; + /* not all systems allow stat'ing a file with / appended */ + if (stat(name,&s)==0) + { + tm_t = s.st_mtime; + ret = 1; + } + } + filedate = localtime(&tm_t); + + tmzip->tm_sec = filedate->tm_sec; + tmzip->tm_min = filedate->tm_min; + tmzip->tm_hour = filedate->tm_hour; + tmzip->tm_mday = filedate->tm_mday; + tmzip->tm_mon = filedate->tm_mon ; + tmzip->tm_year = filedate->tm_year; + + return ret; +} +#else +/* f: name of file to get info on, tmzip: return value: access, + modification and creation times, dt: dostime */ +static int filetime(const char *f, tm_zip *tmzip, uLong *dt) { + (void)f; + (void)tmzip; + (void)dt; + return 0; +} +#endif +#endif + + + + +static int check_exist_file(const char* filename) { + FILE* ftestexist; + int ret = 1; + ftestexist = FOPEN_FUNC(filename,"rb"); + if (ftestexist==NULL) + ret = 0; + else + fclose(ftestexist); + return ret; +} + +static void do_banner(void) { + printf("MiniZip 1.1, demo of zLib + MiniZip64 package, written by Gilles Vollant\n"); + printf("more info on MiniZip at http://www.winimage.com/zLibDll/minizip.html\n\n"); +} + +static void do_help(void) { + printf("Usage : minizip [-o] [-a] [-0 to -9] [-p password] [-j] file.zip [files_to_add]\n\n" \ + " -o Overwrite existing file.zip\n" \ + " -a Append to existing file.zip\n" \ + " -0 Store only\n" \ + " -1 Compress faster\n" \ + " -9 Compress better\n\n" \ + " -j exclude path. store only the file name.\n\n"); +} + +/* calculate the CRC32 of a file, + because to encrypt a file, we need known the CRC32 of the file before */ +static int getFileCrc(const char* filenameinzip, void* buf, unsigned long size_buf, unsigned long* result_crc) { + unsigned long calculate_crc=0; + int err=ZIP_OK; + FILE * fin = FOPEN_FUNC(filenameinzip,"rb"); + + unsigned long size_read = 0; + /* unsigned long total_read = 0; */ + if (fin==NULL) + { + err = ZIP_ERRNO; + } + + if (err == ZIP_OK) + do + { + err = ZIP_OK; + size_read = fread(buf,1,size_buf,fin); + if (size_read < size_buf) + if (feof(fin)==0) + { + printf("error in reading %s\n",filenameinzip); + err = ZIP_ERRNO; + } + + if (size_read>0) + calculate_crc = crc32_z(calculate_crc,buf,size_read); + /* total_read += size_read; */ + + } while ((err == ZIP_OK) && (size_read>0)); + + if (fin) + fclose(fin); + + *result_crc=calculate_crc; + printf("file %s crc %lx\n", filenameinzip, calculate_crc); + return err; +} + +static int isLargeFile(const char* filename) { + int largeFile = 0; + ZPOS64_T pos = 0; + FILE* pFile = FOPEN_FUNC(filename, "rb"); + + if(pFile != NULL) + { + FSEEKO_FUNC(pFile, 0, SEEK_END); + pos = (ZPOS64_T)FTELLO_FUNC(pFile); + + printf("File : %s is %llu bytes\n", filename, pos); + + if(pos >= 0xffffffff) + largeFile = 1; + + fclose(pFile); + } + + return largeFile; +} + +int main(int argc, char *argv[]) { + int i; + int opt_overwrite=0; + int opt_compress_level=Z_DEFAULT_COMPRESSION; + int opt_exclude_path=0; + int zipfilenamearg = 0; + char filename_try[MAXFILENAME+16]; + int zipok; + int err=0; + size_t size_buf=0; + void* buf=NULL; + const char* password=NULL; + + + do_banner(); + if (argc==1) + { + do_help(); + return 0; + } + else + { + for (i=1;i='0') && (c<='9')) + opt_compress_level = c-'0'; + if ((c=='j') || (c=='J')) + opt_exclude_path = 1; + + if (((c=='p') || (c=='P')) && (i+1='a') && (rep<='z')) + rep -= 0x20; + } + while ((rep!='Y') && (rep!='N') && (rep!='A')); + if (rep=='N') + zipok = 0; + if (rep=='A') + opt_overwrite = 2; + } + } + + if (zipok==1) + { + zipFile zf; + int errclose; +# ifdef USEWIN32IOAPI + zlib_filefunc64_def ffunc; + fill_win32_filefunc64A(&ffunc); + zf = zipOpen2_64(filename_try,(opt_overwrite==2) ? 2 : 0,NULL,&ffunc); +# else + zf = zipOpen64(filename_try,(opt_overwrite==2) ? 2 : 0); +# endif + + if (zf == NULL) + { + printf("error opening %s\n",filename_try); + err= ZIP_ERRNO; + } + else + printf("creating %s\n",filename_try); + + for (i=zipfilenamearg+1;(i='0') && (argv[i][1]<='9'))) && + (strlen(argv[i]) == 2))) + { + FILE * fin = NULL; + size_t size_read; + const char* filenameinzip = argv[i]; + const char *savefilenameinzip; + zip_fileinfo zi; + unsigned long crcFile=0; + int zip64 = 0; + + zi.tmz_date.tm_sec = zi.tmz_date.tm_min = zi.tmz_date.tm_hour = + zi.tmz_date.tm_mday = zi.tmz_date.tm_mon = zi.tmz_date.tm_year = 0; + zi.dosDate = 0; + zi.internal_fa = 0; + zi.external_fa = 0; + filetime(filenameinzip,&zi.tmz_date,&zi.dosDate); + +/* + err = zipOpenNewFileInZip(zf,filenameinzip,&zi, + NULL,0,NULL,0,NULL / * comment * /, + (opt_compress_level != 0) ? Z_DEFLATED : 0, + opt_compress_level); +*/ + if ((password != NULL) && (err==ZIP_OK)) + err = getFileCrc(filenameinzip,buf,size_buf,&crcFile); + + zip64 = isLargeFile(filenameinzip); + + /* The path name saved, should not include a leading slash. */ + /*if it did, windows/xp and dynazip couldn't read the zip file. */ + savefilenameinzip = filenameinzip; + while( savefilenameinzip[0] == '\\' || savefilenameinzip[0] == '/' ) + { + savefilenameinzip++; + } + + /*should the zip file contain any path at all?*/ + if( opt_exclude_path ) + { + const char *tmpptr; + const char *lastslash = 0; + for( tmpptr = savefilenameinzip; *tmpptr; tmpptr++) + { + if( *tmpptr == '\\' || *tmpptr == '/') + { + lastslash = tmpptr; + } + } + if( lastslash != NULL ) + { + savefilenameinzip = lastslash+1; // base filename follows last slash. + } + } + + /**/ + err = zipOpenNewFileInZip3_64(zf,savefilenameinzip,&zi, + NULL,0,NULL,0,NULL /* comment*/, + (opt_compress_level != 0) ? Z_DEFLATED : 0, + opt_compress_level,0, + /* -MAX_WBITS, DEF_MEM_LEVEL, Z_DEFAULT_STRATEGY, */ + -MAX_WBITS, DEF_MEM_LEVEL, Z_DEFAULT_STRATEGY, + password,crcFile, zip64); + + if (err != ZIP_OK) + printf("error in opening %s in zipfile\n",filenameinzip); + else + { + fin = FOPEN_FUNC(filenameinzip,"rb"); + if (fin==NULL) + { + err=ZIP_ERRNO; + printf("error in opening %s for reading\n",filenameinzip); + } + } + + if (err == ZIP_OK) + do + { + err = ZIP_OK; + size_read = fread(buf,1,size_buf,fin); + if (size_read < size_buf) + if (feof(fin)==0) + { + printf("error in reading %s\n",filenameinzip); + err = ZIP_ERRNO; + } + + if (size_read>0) + { + err = zipWriteInFileInZip (zf,buf,(unsigned)size_read); + if (err<0) + { + printf("error in writing %s in the zipfile\n", + filenameinzip); + } + + } + } while ((err == ZIP_OK) && (size_read>0)); + + if (fin) + fclose(fin); + + if (err<0) + err=ZIP_ERRNO; + else + { + err = zipCloseFileInZip(zf); + if (err!=ZIP_OK) + printf("error in closing %s in the zipfile\n", + filenameinzip); + } + } + } + errclose = zipClose(zf,NULL); + if (errclose != ZIP_OK) + printf("error in closing %s\n",filename_try); + } + else + { + do_help(); + } + + free(buf); + return 0; +} diff --git a/c-blosc/internal-complibs/zlib-1.3.1/contrib/minizip/mztools.c b/c-blosc/internal-complibs/zlib-1.3.1/contrib/minizip/mztools.c new file mode 100644 index 0000000..c8d2375 --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.3.1/contrib/minizip/mztools.c @@ -0,0 +1,285 @@ +/* + Additional tools for Minizip + Code: Xavier Roche '2004 + License: Same as ZLIB (www.gzip.org) +*/ + +/* Code */ +#include +#include +#include +#include "zlib.h" +#include "unzip.h" + +#define READ_8(adr) ((unsigned char)*(adr)) +#define READ_16(adr) ( READ_8(adr) | (READ_8(adr+1) << 8) ) +#define READ_32(adr) ( READ_16(adr) | (READ_16((adr)+2) << 16) ) + +#define WRITE_8(buff, n) do { \ + *((unsigned char*)(buff)) = (unsigned char) ((n) & 0xff); \ +} while(0) +#define WRITE_16(buff, n) do { \ + WRITE_8((unsigned char*)(buff), n); \ + WRITE_8(((unsigned char*)(buff)) + 1, (n) >> 8); \ +} while(0) +#define WRITE_32(buff, n) do { \ + WRITE_16((unsigned char*)(buff), (n) & 0xffff); \ + WRITE_16((unsigned char*)(buff) + 2, (n) >> 16); \ +} while(0) + +extern int ZEXPORT unzRepair(const char* file, const char* fileOut, const char* fileOutTmp, uLong* nRecovered, uLong* bytesRecovered) { + int err = Z_OK; + FILE* fpZip = fopen(file, "rb"); + FILE* fpOut = fopen(fileOut, "wb"); + FILE* fpOutCD = fopen(fileOutTmp, "wb"); + if (fpZip != NULL && fpOut != NULL) { + int entries = 0; + uLong totalBytes = 0; + char header[30]; + char filename[1024]; + char extra[1024]; + int offset = 0; + int offsetCD = 0; + while ( fread(header, 1, 30, fpZip) == 30 ) { + int currentOffset = offset; + + /* File entry */ + if (READ_32(header) == 0x04034b50) { + unsigned int version = READ_16(header + 4); + unsigned int gpflag = READ_16(header + 6); + unsigned int method = READ_16(header + 8); + unsigned int filetime = READ_16(header + 10); + unsigned int filedate = READ_16(header + 12); + unsigned int crc = READ_32(header + 14); /* crc */ + unsigned int cpsize = READ_32(header + 18); /* compressed size */ + unsigned int uncpsize = READ_32(header + 22); /* uncompressed sz */ + unsigned int fnsize = READ_16(header + 26); /* file name length */ + unsigned int extsize = READ_16(header + 28); /* extra field length */ + filename[0] = extra[0] = '\0'; + + /* Header */ + if (fwrite(header, 1, 30, fpOut) == 30) { + offset += 30; + } else { + err = Z_ERRNO; + break; + } + + /* Filename */ + if (fnsize > 0) { + if (fnsize < sizeof(filename)) { + if (fread(filename, 1, fnsize, fpZip) == fnsize) { + if (fwrite(filename, 1, fnsize, fpOut) == fnsize) { + offset += fnsize; + } else { + err = Z_ERRNO; + break; + } + } else { + err = Z_ERRNO; + break; + } + } else { + err = Z_ERRNO; + break; + } + } else { + err = Z_STREAM_ERROR; + break; + } + + /* Extra field */ + if (extsize > 0) { + if (extsize < sizeof(extra)) { + if (fread(extra, 1, extsize, fpZip) == extsize) { + if (fwrite(extra, 1, extsize, fpOut) == extsize) { + offset += extsize; + } else { + err = Z_ERRNO; + break; + } + } else { + err = Z_ERRNO; + break; + } + } else { + err = Z_ERRNO; + break; + } + } + + /* Data */ + { + int dataSize = cpsize; + if (dataSize == 0) { + dataSize = uncpsize; + } + if (dataSize > 0) { + char* data = malloc(dataSize); + if (data != NULL) { + if ((int)fread(data, 1, dataSize, fpZip) == dataSize) { + if ((int)fwrite(data, 1, dataSize, fpOut) == dataSize) { + offset += dataSize; + totalBytes += dataSize; + } else { + err = Z_ERRNO; + } + } else { + err = Z_ERRNO; + } + free(data); + if (err != Z_OK) { + break; + } + } else { + err = Z_MEM_ERROR; + break; + } + } + } + + /* Central directory entry */ + { + char header[46]; + char* comment = ""; + int comsize = (int) strlen(comment); + WRITE_32(header, 0x02014b50); + WRITE_16(header + 4, version); + WRITE_16(header + 6, version); + WRITE_16(header + 8, gpflag); + WRITE_16(header + 10, method); + WRITE_16(header + 12, filetime); + WRITE_16(header + 14, filedate); + WRITE_32(header + 16, crc); + WRITE_32(header + 20, cpsize); + WRITE_32(header + 24, uncpsize); + WRITE_16(header + 28, fnsize); + WRITE_16(header + 30, extsize); + WRITE_16(header + 32, comsize); + WRITE_16(header + 34, 0); /* disk # */ + WRITE_16(header + 36, 0); /* int attrb */ + WRITE_32(header + 38, 0); /* ext attrb */ + WRITE_32(header + 42, currentOffset); + /* Header */ + if (fwrite(header, 1, 46, fpOutCD) == 46) { + offsetCD += 46; + + /* Filename */ + if (fnsize > 0) { + if (fwrite(filename, 1, fnsize, fpOutCD) == fnsize) { + offsetCD += fnsize; + } else { + err = Z_ERRNO; + break; + } + } else { + err = Z_STREAM_ERROR; + break; + } + + /* Extra field */ + if (extsize > 0) { + if (fwrite(extra, 1, extsize, fpOutCD) == extsize) { + offsetCD += extsize; + } else { + err = Z_ERRNO; + break; + } + } + + /* Comment field */ + if (comsize > 0) { + if ((int)fwrite(comment, 1, comsize, fpOutCD) == comsize) { + offsetCD += comsize; + } else { + err = Z_ERRNO; + break; + } + } + + + } else { + err = Z_ERRNO; + break; + } + } + + /* Success */ + entries++; + + } else { + break; + } + } + + /* Final central directory */ + { + int entriesZip = entries; + char header[22]; + char* comment = ""; // "ZIP File recovered by zlib/minizip/mztools"; + int comsize = (int) strlen(comment); + if (entriesZip > 0xffff) { + entriesZip = 0xffff; + } + WRITE_32(header, 0x06054b50); + WRITE_16(header + 4, 0); /* disk # */ + WRITE_16(header + 6, 0); /* disk # */ + WRITE_16(header + 8, entriesZip); /* hack */ + WRITE_16(header + 10, entriesZip); /* hack */ + WRITE_32(header + 12, offsetCD); /* size of CD */ + WRITE_32(header + 16, offset); /* offset to CD */ + WRITE_16(header + 20, comsize); /* comment */ + + /* Header */ + if (fwrite(header, 1, 22, fpOutCD) == 22) { + + /* Comment field */ + if (comsize > 0) { + if ((int)fwrite(comment, 1, comsize, fpOutCD) != comsize) { + err = Z_ERRNO; + } + } + + } else { + err = Z_ERRNO; + } + } + + /* Final merge (file + central directory) */ + fclose(fpOutCD); + if (err == Z_OK) { + fpOutCD = fopen(fileOutTmp, "rb"); + if (fpOutCD != NULL) { + int nRead; + char buffer[8192]; + while ( (nRead = (int)fread(buffer, 1, sizeof(buffer), fpOutCD)) > 0) { + if ((int)fwrite(buffer, 1, nRead, fpOut) != nRead) { + err = Z_ERRNO; + break; + } + } + fclose(fpOutCD); + } + } + + /* Close */ + fclose(fpZip); + fclose(fpOut); + + /* Wipe temporary file */ + (void)remove(fileOutTmp); + + /* Number of recovered entries */ + if (err == Z_OK) { + if (nRecovered != NULL) { + *nRecovered = entries; + } + if (bytesRecovered != NULL) { + *bytesRecovered = totalBytes; + } + } + } else { + err = Z_STREAM_ERROR; + } + return err; +} diff --git a/c-blosc/internal-complibs/zlib-1.3.1/contrib/minizip/mztools.h b/c-blosc/internal-complibs/zlib-1.3.1/contrib/minizip/mztools.h new file mode 100644 index 0000000..a49a426 --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.3.1/contrib/minizip/mztools.h @@ -0,0 +1,37 @@ +/* + Additional tools for Minizip + Code: Xavier Roche '2004 + License: Same as ZLIB (www.gzip.org) +*/ + +#ifndef _zip_tools_H +#define _zip_tools_H + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef _ZLIB_H +#include "zlib.h" +#endif + +#include "unzip.h" + +/* Repair a ZIP file (missing central directory) + file: file to recover + fileOut: output file after recovery + fileOutTmp: temporary file name used for recovery +*/ +extern int ZEXPORT unzRepair(const char* file, + const char* fileOut, + const char* fileOutTmp, + uLong* nRecovered, + uLong* bytesRecovered); + + +#ifdef __cplusplus +} +#endif + + +#endif diff --git a/c-blosc/internal-complibs/zlib-1.3.1/contrib/minizip/unzip.c b/c-blosc/internal-complibs/zlib-1.3.1/contrib/minizip/unzip.c new file mode 100644 index 0000000..ea05b7d --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.3.1/contrib/minizip/unzip.c @@ -0,0 +1,1985 @@ +/* unzip.c -- IO for uncompress .zip files using zlib + Version 1.1, February 14h, 2010 + part of the MiniZip project - ( http://www.winimage.com/zLibDll/minizip.html ) + + Copyright (C) 1998-2010 Gilles Vollant (minizip) ( http://www.winimage.com/zLibDll/minizip.html ) + + Modifications of Unzip for Zip64 + Copyright (C) 2007-2008 Even Rouault + + Modifications for Zip64 support on both zip and unzip + Copyright (C) 2009-2010 Mathias Svensson ( http://result42.com ) + + For more info read MiniZip_info.txt + + + ------------------------------------------------------------------------------------ + Decryption code comes from crypt.c by Info-ZIP but has been greatly reduced in terms of + compatibility with older software. The following is from the original crypt.c. + Code woven in by Terry Thorsen 1/2003. + + Copyright (c) 1990-2000 Info-ZIP. All rights reserved. + + See the accompanying file LICENSE, version 2000-Apr-09 or later + (the contents of which are also included in zip.h) for terms of use. + If, for some reason, all these files are missing, the Info-ZIP license + also may be found at: ftp://ftp.info-zip.org/pub/infozip/license.html + + crypt.c (full version) by Info-ZIP. Last revised: [see crypt.h] + + The encryption/decryption parts of this source code (as opposed to the + non-echoing password parts) were originally written in Europe. The + whole source package can be freely distributed, including from the USA. + (Prior to January 2000, re-export from the US was a violation of US law.) + + This encryption code is a direct transcription of the algorithm from + Roger Schlafly, described by Phil Katz in the file appnote.txt. This + file (appnote.txt) is distributed with the PKZIP program (even in the + version without encryption capabilities). + + ------------------------------------------------------------------------------------ + + Changes in unzip.c + + 2007-2008 - Even Rouault - Addition of cpl_unzGetCurrentFileZStreamPos + 2007-2008 - Even Rouault - Decoration of symbol names unz* -> cpl_unz* + 2007-2008 - Even Rouault - Remove old C style function prototypes + 2007-2008 - Even Rouault - Add unzip support for ZIP64 + + Copyright (C) 2007-2008 Even Rouault + + + Oct-2009 - Mathias Svensson - Removed cpl_* from symbol names (Even Rouault added them but since this is now moved to a new project (minizip64) I renamed them again). + Oct-2009 - Mathias Svensson - Fixed problem if uncompressed size was > 4G and compressed size was <4G + should only read the compressed/uncompressed size from the Zip64 format if + the size from normal header was 0xFFFFFFFF + Oct-2009 - Mathias Svensson - Applied some bug fixes from patches received from Gilles Vollant + Oct-2009 - Mathias Svensson - Applied support to unzip files with compression method BZIP2 (bzip2 lib is required) + Patch created by Daniel Borca + + Jan-2010 - back to unzip and minizip 1.0 name scheme, with compatibility layer + + Copyright (C) 1998 - 2010 Gilles Vollant, Even Rouault, Mathias Svensson + +*/ + + +#include +#include +#include + +#ifndef NOUNCRYPT + #define NOUNCRYPT +#endif + +#include "zlib.h" +#include "unzip.h" + +#ifdef STDC +# include +#endif +#ifdef NO_ERRNO_H + extern int errno; +#else +# include +#endif + + +#ifndef local +# define local static +#endif +/* compile with -Dlocal if your debugger can't find static symbols */ + + +#ifndef CASESENSITIVITYDEFAULT_NO +# if !defined(unix) && !defined(CASESENSITIVITYDEFAULT_YES) +# define CASESENSITIVITYDEFAULT_NO +# endif +#endif + + +#ifndef UNZ_BUFSIZE +#define UNZ_BUFSIZE (16384) +#endif + +#ifndef UNZ_MAXFILENAMEINZIP +#define UNZ_MAXFILENAMEINZIP (256) +#endif + +#ifndef ALLOC +# define ALLOC(size) (malloc(size)) +#endif + +#define SIZECENTRALDIRITEM (0x2e) +#define SIZEZIPLOCALHEADER (0x1e) + + +const char unz_copyright[] = + " unzip 1.01 Copyright 1998-2004 Gilles Vollant - http://www.winimage.com/zLibDll"; + +/* unz_file_info64_internal contain internal info about a file in zipfile*/ +typedef struct unz_file_info64_internal_s +{ + ZPOS64_T offset_curfile;/* relative offset of local header 8 bytes */ +} unz_file_info64_internal; + + +/* file_in_zip_read_info_s contain internal information about a file in zipfile, + when reading and decompress it */ +typedef struct +{ + char *read_buffer; /* internal buffer for compressed data */ + z_stream stream; /* zLib stream structure for inflate */ + +#ifdef HAVE_BZIP2 + bz_stream bstream; /* bzLib stream structure for bziped */ +#endif + + ZPOS64_T pos_in_zipfile; /* position in byte on the zipfile, for fseek*/ + uLong stream_initialised; /* flag set if stream structure is initialised*/ + + ZPOS64_T offset_local_extrafield;/* offset of the local extra field */ + uInt size_local_extrafield;/* size of the local extra field */ + ZPOS64_T pos_local_extrafield; /* position in the local extra field in read*/ + ZPOS64_T total_out_64; + + uLong crc32; /* crc32 of all data uncompressed */ + uLong crc32_wait; /* crc32 we must obtain after decompress all */ + ZPOS64_T rest_read_compressed; /* number of byte to be decompressed */ + ZPOS64_T rest_read_uncompressed;/*number of byte to be obtained after decomp*/ + zlib_filefunc64_32_def z_filefunc; + voidpf filestream; /* io structure of the zipfile */ + uLong compression_method; /* compression method (0==store) */ + ZPOS64_T byte_before_the_zipfile;/* byte before the zipfile, (>0 for sfx)*/ + int raw; +} file_in_zip64_read_info_s; + + +/* unz64_s contain internal information about the zipfile +*/ +typedef struct +{ + zlib_filefunc64_32_def z_filefunc; + int is64bitOpenFunction; + voidpf filestream; /* io structure of the zipfile */ + unz_global_info64 gi; /* public global information */ + ZPOS64_T byte_before_the_zipfile;/* byte before the zipfile, (>0 for sfx)*/ + ZPOS64_T num_file; /* number of the current file in the zipfile*/ + ZPOS64_T pos_in_central_dir; /* pos of the current file in the central dir*/ + ZPOS64_T current_file_ok; /* flag about the usability of the current file*/ + ZPOS64_T central_pos; /* position of the beginning of the central dir*/ + + ZPOS64_T size_central_dir; /* size of the central directory */ + ZPOS64_T offset_central_dir; /* offset of start of central directory with + respect to the starting disk number */ + + unz_file_info64 cur_file_info; /* public info about the current file in zip*/ + unz_file_info64_internal cur_file_info_internal; /* private info about it*/ + file_in_zip64_read_info_s* pfile_in_zip_read; /* structure about the current + file if we are decompressing it */ + int encrypted; + + int isZip64; + +# ifndef NOUNCRYPT + unsigned long keys[3]; /* keys defining the pseudo-random sequence */ + const z_crc_t* pcrc_32_tab; +# endif +} unz64_s; + + +#ifndef NOUNCRYPT +#include "crypt.h" +#endif + + +/* =========================================================================== + Reads a long in LSB order from the given gz_stream. Sets +*/ + +local int unz64local_getShort(const zlib_filefunc64_32_def* pzlib_filefunc_def, + voidpf filestream, + uLong *pX) { + unsigned char c[2]; + int err = (int)ZREAD64(*pzlib_filefunc_def,filestream,c,2); + if (err==2) + { + *pX = c[0] | ((uLong)c[1] << 8); + return UNZ_OK; + } + else + { + *pX = 0; + if (ZERROR64(*pzlib_filefunc_def,filestream)) + return UNZ_ERRNO; + else + return UNZ_EOF; + } +} + +local int unz64local_getLong(const zlib_filefunc64_32_def* pzlib_filefunc_def, + voidpf filestream, + uLong *pX) { + unsigned char c[4]; + int err = (int)ZREAD64(*pzlib_filefunc_def,filestream,c,4); + if (err==4) + { + *pX = c[0] | ((uLong)c[1] << 8) | ((uLong)c[2] << 16) | ((uLong)c[3] << 24); + return UNZ_OK; + } + else + { + *pX = 0; + if (ZERROR64(*pzlib_filefunc_def,filestream)) + return UNZ_ERRNO; + else + return UNZ_EOF; + } +} + + +local int unz64local_getLong64(const zlib_filefunc64_32_def* pzlib_filefunc_def, + voidpf filestream, + ZPOS64_T *pX) { + unsigned char c[8]; + int err = (int)ZREAD64(*pzlib_filefunc_def,filestream,c,8); + if (err==8) + { + *pX = c[0] | ((ZPOS64_T)c[1] << 8) | ((ZPOS64_T)c[2] << 16) | ((ZPOS64_T)c[3] << 24) + | ((ZPOS64_T)c[4] << 32) | ((ZPOS64_T)c[5] << 40) | ((ZPOS64_T)c[6] << 48) | ((ZPOS64_T)c[7] << 56); + return UNZ_OK; + } + else + { + *pX = 0; + if (ZERROR64(*pzlib_filefunc_def,filestream)) + return UNZ_ERRNO; + else + return UNZ_EOF; + } +} + +/* My own strcmpi / strcasecmp */ +local int strcmpcasenosensitive_internal(const char* fileName1, const char* fileName2) { + for (;;) + { + char c1=*(fileName1++); + char c2=*(fileName2++); + if ((c1>='a') && (c1<='z')) + c1 -= 0x20; + if ((c2>='a') && (c2<='z')) + c2 -= 0x20; + if (c1=='\0') + return ((c2=='\0') ? 0 : -1); + if (c2=='\0') + return 1; + if (c1c2) + return 1; + } +} + + +#ifdef CASESENSITIVITYDEFAULT_NO +#define CASESENSITIVITYDEFAULTVALUE 2 +#else +#define CASESENSITIVITYDEFAULTVALUE 1 +#endif + +#ifndef STRCMPCASENOSENTIVEFUNCTION +#define STRCMPCASENOSENTIVEFUNCTION strcmpcasenosensitive_internal +#endif + +/* + Compare two filenames (fileName1,fileName2). + If iCaseSensitivity = 1, comparison is case sensitive (like strcmp) + If iCaseSensitivity = 2, comparison is not case sensitive (like strcmpi + or strcasecmp) + If iCaseSensitivity = 0, case sensitivity is default of your operating system + (like 1 on Unix, 2 on Windows) + +*/ +extern int ZEXPORT unzStringFileNameCompare (const char* fileName1, + const char* fileName2, + int iCaseSensitivity) { + if (iCaseSensitivity==0) + iCaseSensitivity=CASESENSITIVITYDEFAULTVALUE; + + if (iCaseSensitivity==1) + return strcmp(fileName1,fileName2); + + return STRCMPCASENOSENTIVEFUNCTION(fileName1,fileName2); +} + +#ifndef BUFREADCOMMENT +#define BUFREADCOMMENT (0x400) +#endif + +#ifndef CENTRALDIRINVALID +#define CENTRALDIRINVALID ((ZPOS64_T)(-1)) +#endif + +/* + Locate the Central directory of a zipfile (at the end, just before + the global comment) +*/ +local ZPOS64_T unz64local_SearchCentralDir(const zlib_filefunc64_32_def* pzlib_filefunc_def, voidpf filestream) { + unsigned char* buf; + ZPOS64_T uSizeFile; + ZPOS64_T uBackRead; + ZPOS64_T uMaxBack=0xffff; /* maximum size of global comment */ + ZPOS64_T uPosFound=CENTRALDIRINVALID; + + if (ZSEEK64(*pzlib_filefunc_def,filestream,0,ZLIB_FILEFUNC_SEEK_END) != 0) + return CENTRALDIRINVALID; + + + uSizeFile = ZTELL64(*pzlib_filefunc_def,filestream); + + if (uMaxBack>uSizeFile) + uMaxBack = uSizeFile; + + buf = (unsigned char*)ALLOC(BUFREADCOMMENT+4); + if (buf==NULL) + return CENTRALDIRINVALID; + + uBackRead = 4; + while (uBackReaduMaxBack) + uBackRead = uMaxBack; + else + uBackRead+=BUFREADCOMMENT; + uReadPos = uSizeFile-uBackRead ; + + uReadSize = ((BUFREADCOMMENT+4) < (uSizeFile-uReadPos)) ? + (BUFREADCOMMENT+4) : (uLong)(uSizeFile-uReadPos); + if (ZSEEK64(*pzlib_filefunc_def,filestream,uReadPos,ZLIB_FILEFUNC_SEEK_SET)!=0) + break; + + if (ZREAD64(*pzlib_filefunc_def,filestream,buf,uReadSize)!=uReadSize) + break; + + for (i=(int)uReadSize-3; (i--)>0;) + if (((*(buf+i))==0x50) && ((*(buf+i+1))==0x4b) && + ((*(buf+i+2))==0x05) && ((*(buf+i+3))==0x06)) + { + uPosFound = uReadPos+(unsigned)i; + break; + } + + if (uPosFound!=CENTRALDIRINVALID) + break; + } + free(buf); + return uPosFound; +} + + +/* + Locate the Central directory 64 of a zipfile (at the end, just before + the global comment) +*/ +local ZPOS64_T unz64local_SearchCentralDir64(const zlib_filefunc64_32_def* pzlib_filefunc_def, + voidpf filestream) { + unsigned char* buf; + ZPOS64_T uSizeFile; + ZPOS64_T uBackRead; + ZPOS64_T uMaxBack=0xffff; /* maximum size of global comment */ + ZPOS64_T uPosFound=CENTRALDIRINVALID; + uLong uL; + ZPOS64_T relativeOffset; + + if (ZSEEK64(*pzlib_filefunc_def,filestream,0,ZLIB_FILEFUNC_SEEK_END) != 0) + return CENTRALDIRINVALID; + + + uSizeFile = ZTELL64(*pzlib_filefunc_def,filestream); + + if (uMaxBack>uSizeFile) + uMaxBack = uSizeFile; + + buf = (unsigned char*)ALLOC(BUFREADCOMMENT+4); + if (buf==NULL) + return CENTRALDIRINVALID; + + uBackRead = 4; + while (uBackReaduMaxBack) + uBackRead = uMaxBack; + else + uBackRead+=BUFREADCOMMENT; + uReadPos = uSizeFile-uBackRead ; + + uReadSize = ((BUFREADCOMMENT+4) < (uSizeFile-uReadPos)) ? + (BUFREADCOMMENT+4) : (uLong)(uSizeFile-uReadPos); + if (ZSEEK64(*pzlib_filefunc_def,filestream,uReadPos,ZLIB_FILEFUNC_SEEK_SET)!=0) + break; + + if (ZREAD64(*pzlib_filefunc_def,filestream,buf,uReadSize)!=uReadSize) + break; + + for (i=(int)uReadSize-3; (i--)>0;) + if (((*(buf+i))==0x50) && ((*(buf+i+1))==0x4b) && + ((*(buf+i+2))==0x06) && ((*(buf+i+3))==0x07)) + { + uPosFound = uReadPos+(unsigned)i; + break; + } + + if (uPosFound!=CENTRALDIRINVALID) + break; + } + free(buf); + if (uPosFound == CENTRALDIRINVALID) + return CENTRALDIRINVALID; + + /* Zip64 end of central directory locator */ + if (ZSEEK64(*pzlib_filefunc_def,filestream, uPosFound,ZLIB_FILEFUNC_SEEK_SET)!=0) + return CENTRALDIRINVALID; + + /* the signature, already checked */ + if (unz64local_getLong(pzlib_filefunc_def,filestream,&uL)!=UNZ_OK) + return CENTRALDIRINVALID; + + /* number of the disk with the start of the zip64 end of central directory */ + if (unz64local_getLong(pzlib_filefunc_def,filestream,&uL)!=UNZ_OK) + return CENTRALDIRINVALID; + if (uL != 0) + return CENTRALDIRINVALID; + + /* relative offset of the zip64 end of central directory record */ + if (unz64local_getLong64(pzlib_filefunc_def,filestream,&relativeOffset)!=UNZ_OK) + return CENTRALDIRINVALID; + + /* total number of disks */ + if (unz64local_getLong(pzlib_filefunc_def,filestream,&uL)!=UNZ_OK) + return CENTRALDIRINVALID; + if (uL != 1) + return CENTRALDIRINVALID; + + /* Goto end of central directory record */ + if (ZSEEK64(*pzlib_filefunc_def,filestream, relativeOffset,ZLIB_FILEFUNC_SEEK_SET)!=0) + return CENTRALDIRINVALID; + + /* the signature */ + if (unz64local_getLong(pzlib_filefunc_def,filestream,&uL)!=UNZ_OK) + return CENTRALDIRINVALID; + + if (uL != 0x06064b50) + return CENTRALDIRINVALID; + + return relativeOffset; +} + +/* + Open a Zip file. path contain the full pathname (by example, + on a Windows NT computer "c:\\test\\zlib114.zip" or on an Unix computer + "zlib/zlib114.zip". + If the zipfile cannot be opened (file doesn't exist or in not valid), the + return value is NULL. + Else, the return value is a unzFile Handle, usable with other function + of this unzip package. +*/ +local unzFile unzOpenInternal(const void *path, + zlib_filefunc64_32_def* pzlib_filefunc64_32_def, + int is64bitOpenFunction) { + unz64_s us; + unz64_s *s; + ZPOS64_T central_pos; + uLong uL; + + uLong number_disk; /* number of the current disk, used for + spanning ZIP, unsupported, always 0*/ + uLong number_disk_with_CD; /* number the disk with central dir, used + for spanning ZIP, unsupported, always 0*/ + ZPOS64_T number_entry_CD; /* total number of entries in + the central dir + (same than number_entry on nospan) */ + + int err=UNZ_OK; + + if (unz_copyright[0]!=' ') + return NULL; + + us.z_filefunc.zseek32_file = NULL; + us.z_filefunc.ztell32_file = NULL; + if (pzlib_filefunc64_32_def==NULL) + fill_fopen64_filefunc(&us.z_filefunc.zfile_func64); + else + us.z_filefunc = *pzlib_filefunc64_32_def; + us.is64bitOpenFunction = is64bitOpenFunction; + + + + us.filestream = ZOPEN64(us.z_filefunc, + path, + ZLIB_FILEFUNC_MODE_READ | + ZLIB_FILEFUNC_MODE_EXISTING); + if (us.filestream==NULL) + return NULL; + + central_pos = unz64local_SearchCentralDir64(&us.z_filefunc,us.filestream); + if (central_pos!=CENTRALDIRINVALID) + { + uLong uS; + ZPOS64_T uL64; + + us.isZip64 = 1; + + if (ZSEEK64(us.z_filefunc, us.filestream, + central_pos,ZLIB_FILEFUNC_SEEK_SET)!=0) + err=UNZ_ERRNO; + + /* the signature, already checked */ + if (unz64local_getLong(&us.z_filefunc, us.filestream,&uL)!=UNZ_OK) + err=UNZ_ERRNO; + + /* size of zip64 end of central directory record */ + if (unz64local_getLong64(&us.z_filefunc, us.filestream,&uL64)!=UNZ_OK) + err=UNZ_ERRNO; + + /* version made by */ + if (unz64local_getShort(&us.z_filefunc, us.filestream,&uS)!=UNZ_OK) + err=UNZ_ERRNO; + + /* version needed to extract */ + if (unz64local_getShort(&us.z_filefunc, us.filestream,&uS)!=UNZ_OK) + err=UNZ_ERRNO; + + /* number of this disk */ + if (unz64local_getLong(&us.z_filefunc, us.filestream,&number_disk)!=UNZ_OK) + err=UNZ_ERRNO; + + /* number of the disk with the start of the central directory */ + if (unz64local_getLong(&us.z_filefunc, us.filestream,&number_disk_with_CD)!=UNZ_OK) + err=UNZ_ERRNO; + + /* total number of entries in the central directory on this disk */ + if (unz64local_getLong64(&us.z_filefunc, us.filestream,&us.gi.number_entry)!=UNZ_OK) + err=UNZ_ERRNO; + + /* total number of entries in the central directory */ + if (unz64local_getLong64(&us.z_filefunc, us.filestream,&number_entry_CD)!=UNZ_OK) + err=UNZ_ERRNO; + + if ((number_entry_CD!=us.gi.number_entry) || + (number_disk_with_CD!=0) || + (number_disk!=0)) + err=UNZ_BADZIPFILE; + + /* size of the central directory */ + if (unz64local_getLong64(&us.z_filefunc, us.filestream,&us.size_central_dir)!=UNZ_OK) + err=UNZ_ERRNO; + + /* offset of start of central directory with respect to the + starting disk number */ + if (unz64local_getLong64(&us.z_filefunc, us.filestream,&us.offset_central_dir)!=UNZ_OK) + err=UNZ_ERRNO; + + us.gi.size_comment = 0; + } + else + { + central_pos = unz64local_SearchCentralDir(&us.z_filefunc,us.filestream); + if (central_pos==CENTRALDIRINVALID) + err=UNZ_ERRNO; + + us.isZip64 = 0; + + if (ZSEEK64(us.z_filefunc, us.filestream, + central_pos,ZLIB_FILEFUNC_SEEK_SET)!=0) + err=UNZ_ERRNO; + + /* the signature, already checked */ + if (unz64local_getLong(&us.z_filefunc, us.filestream,&uL)!=UNZ_OK) + err=UNZ_ERRNO; + + /* number of this disk */ + if (unz64local_getShort(&us.z_filefunc, us.filestream,&number_disk)!=UNZ_OK) + err=UNZ_ERRNO; + + /* number of the disk with the start of the central directory */ + if (unz64local_getShort(&us.z_filefunc, us.filestream,&number_disk_with_CD)!=UNZ_OK) + err=UNZ_ERRNO; + + /* total number of entries in the central dir on this disk */ + if (unz64local_getShort(&us.z_filefunc, us.filestream,&uL)!=UNZ_OK) + err=UNZ_ERRNO; + us.gi.number_entry = uL; + + /* total number of entries in the central dir */ + if (unz64local_getShort(&us.z_filefunc, us.filestream,&uL)!=UNZ_OK) + err=UNZ_ERRNO; + number_entry_CD = uL; + + if ((number_entry_CD!=us.gi.number_entry) || + (number_disk_with_CD!=0) || + (number_disk!=0)) + err=UNZ_BADZIPFILE; + + /* size of the central directory */ + if (unz64local_getLong(&us.z_filefunc, us.filestream,&uL)!=UNZ_OK) + err=UNZ_ERRNO; + us.size_central_dir = uL; + + /* offset of start of central directory with respect to the + starting disk number */ + if (unz64local_getLong(&us.z_filefunc, us.filestream,&uL)!=UNZ_OK) + err=UNZ_ERRNO; + us.offset_central_dir = uL; + + /* zipfile comment length */ + if (unz64local_getShort(&us.z_filefunc, us.filestream,&us.gi.size_comment)!=UNZ_OK) + err=UNZ_ERRNO; + } + + if ((central_pospfile_in_zip_read!=NULL) + unzCloseCurrentFile(file); + + ZCLOSE64(s->z_filefunc, s->filestream); + free(s); + return UNZ_OK; +} + + +/* + Write info about the ZipFile in the *pglobal_info structure. + No preparation of the structure is needed + return UNZ_OK if there is no problem. */ +extern int ZEXPORT unzGetGlobalInfo64(unzFile file, unz_global_info64* pglobal_info) { + unz64_s* s; + if (file==NULL) + return UNZ_PARAMERROR; + s=(unz64_s*)file; + *pglobal_info=s->gi; + return UNZ_OK; +} + +extern int ZEXPORT unzGetGlobalInfo(unzFile file, unz_global_info* pglobal_info32) { + unz64_s* s; + if (file==NULL) + return UNZ_PARAMERROR; + s=(unz64_s*)file; + /* to do : check if number_entry is not truncated */ + pglobal_info32->number_entry = (uLong)s->gi.number_entry; + pglobal_info32->size_comment = s->gi.size_comment; + return UNZ_OK; +} +/* + Translate date/time from Dos format to tm_unz (readable more easily) +*/ +local void unz64local_DosDateToTmuDate(ZPOS64_T ulDosDate, tm_unz* ptm) { + ZPOS64_T uDate; + uDate = (ZPOS64_T)(ulDosDate>>16); + ptm->tm_mday = (int)(uDate&0x1f) ; + ptm->tm_mon = (int)((((uDate)&0x1E0)/0x20)-1) ; + ptm->tm_year = (int)(((uDate&0x0FE00)/0x0200)+1980) ; + + ptm->tm_hour = (int) ((ulDosDate &0xF800)/0x800); + ptm->tm_min = (int) ((ulDosDate&0x7E0)/0x20) ; + ptm->tm_sec = (int) (2*(ulDosDate&0x1f)) ; +} + +/* + Get Info about the current file in the zipfile, with internal only info +*/ +local int unz64local_GetCurrentFileInfoInternal(unzFile file, + unz_file_info64 *pfile_info, + unz_file_info64_internal + *pfile_info_internal, + char *szFileName, + uLong fileNameBufferSize, + void *extraField, + uLong extraFieldBufferSize, + char *szComment, + uLong commentBufferSize) { + unz64_s* s; + unz_file_info64 file_info; + unz_file_info64_internal file_info_internal; + int err=UNZ_OK; + uLong uMagic; + long lSeek=0; + uLong uL; + + if (file==NULL) + return UNZ_PARAMERROR; + s=(unz64_s*)file; + if (ZSEEK64(s->z_filefunc, s->filestream, + s->pos_in_central_dir+s->byte_before_the_zipfile, + ZLIB_FILEFUNC_SEEK_SET)!=0) + err=UNZ_ERRNO; + + + /* we check the magic */ + if (err==UNZ_OK) + { + if (unz64local_getLong(&s->z_filefunc, s->filestream,&uMagic) != UNZ_OK) + err=UNZ_ERRNO; + else if (uMagic!=0x02014b50) + err=UNZ_BADZIPFILE; + } + + if (unz64local_getShort(&s->z_filefunc, s->filestream,&file_info.version) != UNZ_OK) + err=UNZ_ERRNO; + + if (unz64local_getShort(&s->z_filefunc, s->filestream,&file_info.version_needed) != UNZ_OK) + err=UNZ_ERRNO; + + if (unz64local_getShort(&s->z_filefunc, s->filestream,&file_info.flag) != UNZ_OK) + err=UNZ_ERRNO; + + if (unz64local_getShort(&s->z_filefunc, s->filestream,&file_info.compression_method) != UNZ_OK) + err=UNZ_ERRNO; + + if (unz64local_getLong(&s->z_filefunc, s->filestream,&file_info.dosDate) != UNZ_OK) + err=UNZ_ERRNO; + + unz64local_DosDateToTmuDate(file_info.dosDate,&file_info.tmu_date); + + if (unz64local_getLong(&s->z_filefunc, s->filestream,&file_info.crc) != UNZ_OK) + err=UNZ_ERRNO; + + if (unz64local_getLong(&s->z_filefunc, s->filestream,&uL) != UNZ_OK) + err=UNZ_ERRNO; + file_info.compressed_size = uL; + + if (unz64local_getLong(&s->z_filefunc, s->filestream,&uL) != UNZ_OK) + err=UNZ_ERRNO; + file_info.uncompressed_size = uL; + + if (unz64local_getShort(&s->z_filefunc, s->filestream,&file_info.size_filename) != UNZ_OK) + err=UNZ_ERRNO; + + if (unz64local_getShort(&s->z_filefunc, s->filestream,&file_info.size_file_extra) != UNZ_OK) + err=UNZ_ERRNO; + + if (unz64local_getShort(&s->z_filefunc, s->filestream,&file_info.size_file_comment) != UNZ_OK) + err=UNZ_ERRNO; + + if (unz64local_getShort(&s->z_filefunc, s->filestream,&file_info.disk_num_start) != UNZ_OK) + err=UNZ_ERRNO; + + if (unz64local_getShort(&s->z_filefunc, s->filestream,&file_info.internal_fa) != UNZ_OK) + err=UNZ_ERRNO; + + if (unz64local_getLong(&s->z_filefunc, s->filestream,&file_info.external_fa) != UNZ_OK) + err=UNZ_ERRNO; + + // relative offset of local header + if (unz64local_getLong(&s->z_filefunc, s->filestream,&uL) != UNZ_OK) + err=UNZ_ERRNO; + file_info_internal.offset_curfile = uL; + + lSeek+=file_info.size_filename; + if ((err==UNZ_OK) && (szFileName!=NULL)) + { + uLong uSizeRead ; + if (file_info.size_filename0) && (fileNameBufferSize>0)) + if (ZREAD64(s->z_filefunc, s->filestream,szFileName,uSizeRead)!=uSizeRead) + err=UNZ_ERRNO; + lSeek -= uSizeRead; + } + + // Read extrafield + if ((err==UNZ_OK) && (extraField!=NULL)) + { + ZPOS64_T uSizeRead ; + if (file_info.size_file_extraz_filefunc, s->filestream,(ZPOS64_T)lSeek,ZLIB_FILEFUNC_SEEK_CUR)==0) + lSeek=0; + else + err=UNZ_ERRNO; + } + + if ((file_info.size_file_extra>0) && (extraFieldBufferSize>0)) + if (ZREAD64(s->z_filefunc, s->filestream,extraField,(uLong)uSizeRead)!=uSizeRead) + err=UNZ_ERRNO; + + lSeek += file_info.size_file_extra - (uLong)uSizeRead; + } + else + lSeek += file_info.size_file_extra; + + + if ((err==UNZ_OK) && (file_info.size_file_extra != 0)) + { + uLong acc = 0; + + // since lSeek now points to after the extra field we need to move back + lSeek -= file_info.size_file_extra; + + if (lSeek!=0) + { + if (ZSEEK64(s->z_filefunc, s->filestream,(ZPOS64_T)lSeek,ZLIB_FILEFUNC_SEEK_CUR)==0) + lSeek=0; + else + err=UNZ_ERRNO; + } + + while(acc < file_info.size_file_extra) + { + uLong headerId; + uLong dataSize; + + if (unz64local_getShort(&s->z_filefunc, s->filestream,&headerId) != UNZ_OK) + err=UNZ_ERRNO; + + if (unz64local_getShort(&s->z_filefunc, s->filestream,&dataSize) != UNZ_OK) + err=UNZ_ERRNO; + + /* ZIP64 extra fields */ + if (headerId == 0x0001) + { + if(file_info.uncompressed_size == MAXU32) + { + if (unz64local_getLong64(&s->z_filefunc, s->filestream,&file_info.uncompressed_size) != UNZ_OK) + err=UNZ_ERRNO; + } + + if(file_info.compressed_size == MAXU32) + { + if (unz64local_getLong64(&s->z_filefunc, s->filestream,&file_info.compressed_size) != UNZ_OK) + err=UNZ_ERRNO; + } + + if(file_info_internal.offset_curfile == MAXU32) + { + /* Relative Header offset */ + if (unz64local_getLong64(&s->z_filefunc, s->filestream,&file_info_internal.offset_curfile) != UNZ_OK) + err=UNZ_ERRNO; + } + + if(file_info.disk_num_start == 0xffff) + { + /* Disk Start Number */ + if (unz64local_getLong(&s->z_filefunc, s->filestream,&file_info.disk_num_start) != UNZ_OK) + err=UNZ_ERRNO; + } + + } + else + { + if (ZSEEK64(s->z_filefunc, s->filestream,dataSize,ZLIB_FILEFUNC_SEEK_CUR)!=0) + err=UNZ_ERRNO; + } + + acc += 2 + 2 + dataSize; + } + } + + if ((err==UNZ_OK) && (szComment!=NULL)) + { + uLong uSizeRead ; + if (file_info.size_file_commentz_filefunc, s->filestream,(ZPOS64_T)lSeek,ZLIB_FILEFUNC_SEEK_CUR)==0) + lSeek=0; + else + err=UNZ_ERRNO; + } + + if ((file_info.size_file_comment>0) && (commentBufferSize>0)) + if (ZREAD64(s->z_filefunc, s->filestream,szComment,uSizeRead)!=uSizeRead) + err=UNZ_ERRNO; + lSeek+=file_info.size_file_comment - uSizeRead; + } + else + lSeek+=file_info.size_file_comment; + + + if ((err==UNZ_OK) && (pfile_info!=NULL)) + *pfile_info=file_info; + + if ((err==UNZ_OK) && (pfile_info_internal!=NULL)) + *pfile_info_internal=file_info_internal; + + return err; +} + + + +/* + Write info about the ZipFile in the *pglobal_info structure. + No preparation of the structure is needed + return UNZ_OK if there is no problem. +*/ +extern int ZEXPORT unzGetCurrentFileInfo64(unzFile file, + unz_file_info64 * pfile_info, + char * szFileName, uLong fileNameBufferSize, + void *extraField, uLong extraFieldBufferSize, + char* szComment, uLong commentBufferSize) { + return unz64local_GetCurrentFileInfoInternal(file,pfile_info,NULL, + szFileName,fileNameBufferSize, + extraField,extraFieldBufferSize, + szComment,commentBufferSize); +} + +extern int ZEXPORT unzGetCurrentFileInfo(unzFile file, + unz_file_info * pfile_info, + char * szFileName, uLong fileNameBufferSize, + void *extraField, uLong extraFieldBufferSize, + char* szComment, uLong commentBufferSize) { + int err; + unz_file_info64 file_info64; + err = unz64local_GetCurrentFileInfoInternal(file,&file_info64,NULL, + szFileName,fileNameBufferSize, + extraField,extraFieldBufferSize, + szComment,commentBufferSize); + if ((err==UNZ_OK) && (pfile_info != NULL)) + { + pfile_info->version = file_info64.version; + pfile_info->version_needed = file_info64.version_needed; + pfile_info->flag = file_info64.flag; + pfile_info->compression_method = file_info64.compression_method; + pfile_info->dosDate = file_info64.dosDate; + pfile_info->crc = file_info64.crc; + + pfile_info->size_filename = file_info64.size_filename; + pfile_info->size_file_extra = file_info64.size_file_extra; + pfile_info->size_file_comment = file_info64.size_file_comment; + + pfile_info->disk_num_start = file_info64.disk_num_start; + pfile_info->internal_fa = file_info64.internal_fa; + pfile_info->external_fa = file_info64.external_fa; + + pfile_info->tmu_date = file_info64.tmu_date; + + + pfile_info->compressed_size = (uLong)file_info64.compressed_size; + pfile_info->uncompressed_size = (uLong)file_info64.uncompressed_size; + + } + return err; +} +/* + Set the current file of the zipfile to the first file. + return UNZ_OK if there is no problem +*/ +extern int ZEXPORT unzGoToFirstFile(unzFile file) { + int err=UNZ_OK; + unz64_s* s; + if (file==NULL) + return UNZ_PARAMERROR; + s=(unz64_s*)file; + s->pos_in_central_dir=s->offset_central_dir; + s->num_file=0; + err=unz64local_GetCurrentFileInfoInternal(file,&s->cur_file_info, + &s->cur_file_info_internal, + NULL,0,NULL,0,NULL,0); + s->current_file_ok = (err == UNZ_OK); + return err; +} + +/* + Set the current file of the zipfile to the next file. + return UNZ_OK if there is no problem + return UNZ_END_OF_LIST_OF_FILE if the actual file was the latest. +*/ +extern int ZEXPORT unzGoToNextFile(unzFile file) { + unz64_s* s; + int err; + + if (file==NULL) + return UNZ_PARAMERROR; + s=(unz64_s*)file; + if (!s->current_file_ok) + return UNZ_END_OF_LIST_OF_FILE; + if (s->gi.number_entry != 0xffff) /* 2^16 files overflow hack */ + if (s->num_file+1==s->gi.number_entry) + return UNZ_END_OF_LIST_OF_FILE; + + s->pos_in_central_dir += SIZECENTRALDIRITEM + s->cur_file_info.size_filename + + s->cur_file_info.size_file_extra + s->cur_file_info.size_file_comment ; + s->num_file++; + err = unz64local_GetCurrentFileInfoInternal(file,&s->cur_file_info, + &s->cur_file_info_internal, + NULL,0,NULL,0,NULL,0); + s->current_file_ok = (err == UNZ_OK); + return err; +} + + +/* + Try locate the file szFileName in the zipfile. + For the iCaseSensitivity signification, see unzStringFileNameCompare + + return value : + UNZ_OK if the file is found. It becomes the current file. + UNZ_END_OF_LIST_OF_FILE if the file is not found +*/ +extern int ZEXPORT unzLocateFile(unzFile file, const char *szFileName, int iCaseSensitivity) { + unz64_s* s; + int err; + + /* We remember the 'current' position in the file so that we can jump + * back there if we fail. + */ + unz_file_info64 cur_file_infoSaved; + unz_file_info64_internal cur_file_info_internalSaved; + ZPOS64_T num_fileSaved; + ZPOS64_T pos_in_central_dirSaved; + + + if (file==NULL) + return UNZ_PARAMERROR; + + if (strlen(szFileName)>=UNZ_MAXFILENAMEINZIP) + return UNZ_PARAMERROR; + + s=(unz64_s*)file; + if (!s->current_file_ok) + return UNZ_END_OF_LIST_OF_FILE; + + /* Save the current state */ + num_fileSaved = s->num_file; + pos_in_central_dirSaved = s->pos_in_central_dir; + cur_file_infoSaved = s->cur_file_info; + cur_file_info_internalSaved = s->cur_file_info_internal; + + err = unzGoToFirstFile(file); + + while (err == UNZ_OK) + { + char szCurrentFileName[UNZ_MAXFILENAMEINZIP+1]; + err = unzGetCurrentFileInfo64(file,NULL, + szCurrentFileName,sizeof(szCurrentFileName)-1, + NULL,0,NULL,0); + if (err == UNZ_OK) + { + if (unzStringFileNameCompare(szCurrentFileName, + szFileName,iCaseSensitivity)==0) + return UNZ_OK; + err = unzGoToNextFile(file); + } + } + + /* We failed, so restore the state of the 'current file' to where we + * were. + */ + s->num_file = num_fileSaved ; + s->pos_in_central_dir = pos_in_central_dirSaved ; + s->cur_file_info = cur_file_infoSaved; + s->cur_file_info_internal = cur_file_info_internalSaved; + return err; +} + + +/* +/////////////////////////////////////////// +// Contributed by Ryan Haksi (mailto://cryogen@infoserve.net) +// I need random access +// +// Further optimization could be realized by adding an ability +// to cache the directory in memory. The goal being a single +// comprehensive file read to put the file I need in a memory. +*/ + +/* +typedef struct unz_file_pos_s +{ + ZPOS64_T pos_in_zip_directory; // offset in file + ZPOS64_T num_of_file; // # of file +} unz_file_pos; +*/ + +extern int ZEXPORT unzGetFilePos64(unzFile file, unz64_file_pos* file_pos) { + unz64_s* s; + + if (file==NULL || file_pos==NULL) + return UNZ_PARAMERROR; + s=(unz64_s*)file; + if (!s->current_file_ok) + return UNZ_END_OF_LIST_OF_FILE; + + file_pos->pos_in_zip_directory = s->pos_in_central_dir; + file_pos->num_of_file = s->num_file; + + return UNZ_OK; +} + +extern int ZEXPORT unzGetFilePos(unzFile file, unz_file_pos* file_pos) { + unz64_file_pos file_pos64; + int err = unzGetFilePos64(file,&file_pos64); + if (err==UNZ_OK) + { + file_pos->pos_in_zip_directory = (uLong)file_pos64.pos_in_zip_directory; + file_pos->num_of_file = (uLong)file_pos64.num_of_file; + } + return err; +} + +extern int ZEXPORT unzGoToFilePos64(unzFile file, const unz64_file_pos* file_pos) { + unz64_s* s; + int err; + + if (file==NULL || file_pos==NULL) + return UNZ_PARAMERROR; + s=(unz64_s*)file; + + /* jump to the right spot */ + s->pos_in_central_dir = file_pos->pos_in_zip_directory; + s->num_file = file_pos->num_of_file; + + /* set the current file */ + err = unz64local_GetCurrentFileInfoInternal(file,&s->cur_file_info, + &s->cur_file_info_internal, + NULL,0,NULL,0,NULL,0); + /* return results */ + s->current_file_ok = (err == UNZ_OK); + return err; +} + +extern int ZEXPORT unzGoToFilePos(unzFile file, unz_file_pos* file_pos) { + unz64_file_pos file_pos64; + if (file_pos == NULL) + return UNZ_PARAMERROR; + + file_pos64.pos_in_zip_directory = file_pos->pos_in_zip_directory; + file_pos64.num_of_file = file_pos->num_of_file; + return unzGoToFilePos64(file,&file_pos64); +} + +/* +// Unzip Helper Functions - should be here? +/////////////////////////////////////////// +*/ + +/* + Read the local header of the current zipfile + Check the coherency of the local header and info in the end of central + directory about this file + store in *piSizeVar the size of extra info in local header + (filename and size of extra field data) +*/ +local int unz64local_CheckCurrentFileCoherencyHeader(unz64_s* s, uInt* piSizeVar, + ZPOS64_T * poffset_local_extrafield, + uInt * psize_local_extrafield) { + uLong uMagic,uData,uFlags; + uLong size_filename; + uLong size_extra_field; + int err=UNZ_OK; + + *piSizeVar = 0; + *poffset_local_extrafield = 0; + *psize_local_extrafield = 0; + + if (ZSEEK64(s->z_filefunc, s->filestream,s->cur_file_info_internal.offset_curfile + + s->byte_before_the_zipfile,ZLIB_FILEFUNC_SEEK_SET)!=0) + return UNZ_ERRNO; + + + if (err==UNZ_OK) + { + if (unz64local_getLong(&s->z_filefunc, s->filestream,&uMagic) != UNZ_OK) + err=UNZ_ERRNO; + else if (uMagic!=0x04034b50) + err=UNZ_BADZIPFILE; + } + + if (unz64local_getShort(&s->z_filefunc, s->filestream,&uData) != UNZ_OK) + err=UNZ_ERRNO; +/* + else if ((err==UNZ_OK) && (uData!=s->cur_file_info.wVersion)) + err=UNZ_BADZIPFILE; +*/ + if (unz64local_getShort(&s->z_filefunc, s->filestream,&uFlags) != UNZ_OK) + err=UNZ_ERRNO; + + if (unz64local_getShort(&s->z_filefunc, s->filestream,&uData) != UNZ_OK) + err=UNZ_ERRNO; + else if ((err==UNZ_OK) && (uData!=s->cur_file_info.compression_method)) + err=UNZ_BADZIPFILE; + + if ((err==UNZ_OK) && (s->cur_file_info.compression_method!=0) && +/* #ifdef HAVE_BZIP2 */ + (s->cur_file_info.compression_method!=Z_BZIP2ED) && +/* #endif */ + (s->cur_file_info.compression_method!=Z_DEFLATED)) + err=UNZ_BADZIPFILE; + + if (unz64local_getLong(&s->z_filefunc, s->filestream,&uData) != UNZ_OK) /* date/time */ + err=UNZ_ERRNO; + + if (unz64local_getLong(&s->z_filefunc, s->filestream,&uData) != UNZ_OK) /* crc */ + err=UNZ_ERRNO; + else if ((err==UNZ_OK) && (uData!=s->cur_file_info.crc) && ((uFlags & 8)==0)) + err=UNZ_BADZIPFILE; + + if (unz64local_getLong(&s->z_filefunc, s->filestream,&uData) != UNZ_OK) /* size compr */ + err=UNZ_ERRNO; + else if (uData != 0xFFFFFFFF && (err==UNZ_OK) && (uData!=s->cur_file_info.compressed_size) && ((uFlags & 8)==0)) + err=UNZ_BADZIPFILE; + + if (unz64local_getLong(&s->z_filefunc, s->filestream,&uData) != UNZ_OK) /* size uncompr */ + err=UNZ_ERRNO; + else if (uData != 0xFFFFFFFF && (err==UNZ_OK) && (uData!=s->cur_file_info.uncompressed_size) && ((uFlags & 8)==0)) + err=UNZ_BADZIPFILE; + + if (unz64local_getShort(&s->z_filefunc, s->filestream,&size_filename) != UNZ_OK) + err=UNZ_ERRNO; + else if ((err==UNZ_OK) && (size_filename!=s->cur_file_info.size_filename)) + err=UNZ_BADZIPFILE; + + *piSizeVar += (uInt)size_filename; + + if (unz64local_getShort(&s->z_filefunc, s->filestream,&size_extra_field) != UNZ_OK) + err=UNZ_ERRNO; + *poffset_local_extrafield= s->cur_file_info_internal.offset_curfile + + SIZEZIPLOCALHEADER + size_filename; + *psize_local_extrafield = (uInt)size_extra_field; + + *piSizeVar += (uInt)size_extra_field; + + return err; +} + +/* + Open for reading data the current file in the zipfile. + If there is no error and the file is opened, the return value is UNZ_OK. +*/ +extern int ZEXPORT unzOpenCurrentFile3(unzFile file, int* method, + int* level, int raw, const char* password) { + int err=UNZ_OK; + uInt iSizeVar; + unz64_s* s; + file_in_zip64_read_info_s* pfile_in_zip_read_info; + ZPOS64_T offset_local_extrafield; /* offset of the local extra field */ + uInt size_local_extrafield; /* size of the local extra field */ +# ifndef NOUNCRYPT + char source[12]; +# else + if (password != NULL) + return UNZ_PARAMERROR; +# endif + + if (file==NULL) + return UNZ_PARAMERROR; + s=(unz64_s*)file; + if (!s->current_file_ok) + return UNZ_PARAMERROR; + + if (s->pfile_in_zip_read != NULL) + unzCloseCurrentFile(file); + + if (unz64local_CheckCurrentFileCoherencyHeader(s,&iSizeVar, &offset_local_extrafield,&size_local_extrafield)!=UNZ_OK) + return UNZ_BADZIPFILE; + + pfile_in_zip_read_info = (file_in_zip64_read_info_s*)ALLOC(sizeof(file_in_zip64_read_info_s)); + if (pfile_in_zip_read_info==NULL) + return UNZ_INTERNALERROR; + + pfile_in_zip_read_info->read_buffer=(char*)ALLOC(UNZ_BUFSIZE); + pfile_in_zip_read_info->offset_local_extrafield = offset_local_extrafield; + pfile_in_zip_read_info->size_local_extrafield = size_local_extrafield; + pfile_in_zip_read_info->pos_local_extrafield=0; + pfile_in_zip_read_info->raw=raw; + + if (pfile_in_zip_read_info->read_buffer==NULL) + { + free(pfile_in_zip_read_info); + return UNZ_INTERNALERROR; + } + + pfile_in_zip_read_info->stream_initialised=0; + + if (method!=NULL) + *method = (int)s->cur_file_info.compression_method; + + if (level!=NULL) + { + *level = 6; + switch (s->cur_file_info.flag & 0x06) + { + case 6 : *level = 1; break; + case 4 : *level = 2; break; + case 2 : *level = 9; break; + } + } + + if ((s->cur_file_info.compression_method!=0) && +/* #ifdef HAVE_BZIP2 */ + (s->cur_file_info.compression_method!=Z_BZIP2ED) && +/* #endif */ + (s->cur_file_info.compression_method!=Z_DEFLATED)) + + err=UNZ_BADZIPFILE; + + pfile_in_zip_read_info->crc32_wait=s->cur_file_info.crc; + pfile_in_zip_read_info->crc32=0; + pfile_in_zip_read_info->total_out_64=0; + pfile_in_zip_read_info->compression_method = s->cur_file_info.compression_method; + pfile_in_zip_read_info->filestream=s->filestream; + pfile_in_zip_read_info->z_filefunc=s->z_filefunc; + pfile_in_zip_read_info->byte_before_the_zipfile=s->byte_before_the_zipfile; + + pfile_in_zip_read_info->stream.total_out = 0; + + if ((s->cur_file_info.compression_method==Z_BZIP2ED) && (!raw)) + { +#ifdef HAVE_BZIP2 + pfile_in_zip_read_info->bstream.bzalloc = (void *(*) (void *, int, int))0; + pfile_in_zip_read_info->bstream.bzfree = (free_func)0; + pfile_in_zip_read_info->bstream.opaque = (voidpf)0; + pfile_in_zip_read_info->bstream.state = (voidpf)0; + + pfile_in_zip_read_info->stream.zalloc = (alloc_func)0; + pfile_in_zip_read_info->stream.zfree = (free_func)0; + pfile_in_zip_read_info->stream.opaque = (voidpf)0; + pfile_in_zip_read_info->stream.next_in = (voidpf)0; + pfile_in_zip_read_info->stream.avail_in = 0; + + err=BZ2_bzDecompressInit(&pfile_in_zip_read_info->bstream, 0, 0); + if (err == Z_OK) + pfile_in_zip_read_info->stream_initialised=Z_BZIP2ED; + else + { + free(pfile_in_zip_read_info->read_buffer); + free(pfile_in_zip_read_info); + return err; + } +#else + pfile_in_zip_read_info->raw=1; +#endif + } + else if ((s->cur_file_info.compression_method==Z_DEFLATED) && (!raw)) + { + pfile_in_zip_read_info->stream.zalloc = (alloc_func)0; + pfile_in_zip_read_info->stream.zfree = (free_func)0; + pfile_in_zip_read_info->stream.opaque = (voidpf)0; + pfile_in_zip_read_info->stream.next_in = 0; + pfile_in_zip_read_info->stream.avail_in = 0; + + err=inflateInit2(&pfile_in_zip_read_info->stream, -MAX_WBITS); + if (err == Z_OK) + pfile_in_zip_read_info->stream_initialised=Z_DEFLATED; + else + { + free(pfile_in_zip_read_info->read_buffer); + free(pfile_in_zip_read_info); + return err; + } + /* windowBits is passed < 0 to tell that there is no zlib header. + * Note that in this case inflate *requires* an extra "dummy" byte + * after the compressed stream in order to complete decompression and + * return Z_STREAM_END. + * In unzip, i don't wait absolutely Z_STREAM_END because I known the + * size of both compressed and uncompressed data + */ + } + pfile_in_zip_read_info->rest_read_compressed = + s->cur_file_info.compressed_size ; + pfile_in_zip_read_info->rest_read_uncompressed = + s->cur_file_info.uncompressed_size ; + + + pfile_in_zip_read_info->pos_in_zipfile = + s->cur_file_info_internal.offset_curfile + SIZEZIPLOCALHEADER + + iSizeVar; + + pfile_in_zip_read_info->stream.avail_in = (uInt)0; + + s->pfile_in_zip_read = pfile_in_zip_read_info; + s->encrypted = 0; + +# ifndef NOUNCRYPT + if (password != NULL) + { + int i; + s->pcrc_32_tab = get_crc_table(); + init_keys(password,s->keys,s->pcrc_32_tab); + if (ZSEEK64(s->z_filefunc, s->filestream, + s->pfile_in_zip_read->pos_in_zipfile + + s->pfile_in_zip_read->byte_before_the_zipfile, + SEEK_SET)!=0) + return UNZ_INTERNALERROR; + if(ZREAD64(s->z_filefunc, s->filestream,source, 12)<12) + return UNZ_INTERNALERROR; + + for (i = 0; i<12; i++) + zdecode(s->keys,s->pcrc_32_tab,source[i]); + + s->pfile_in_zip_read->pos_in_zipfile+=12; + s->encrypted=1; + } +# endif + + + return UNZ_OK; +} + +extern int ZEXPORT unzOpenCurrentFile(unzFile file) { + return unzOpenCurrentFile3(file, NULL, NULL, 0, NULL); +} + +extern int ZEXPORT unzOpenCurrentFilePassword(unzFile file, const char* password) { + return unzOpenCurrentFile3(file, NULL, NULL, 0, password); +} + +extern int ZEXPORT unzOpenCurrentFile2(unzFile file, int* method, int* level, int raw) { + return unzOpenCurrentFile3(file, method, level, raw, NULL); +} + +/** Addition for GDAL : START */ + +extern ZPOS64_T ZEXPORT unzGetCurrentFileZStreamPos64(unzFile file) { + unz64_s* s; + file_in_zip64_read_info_s* pfile_in_zip_read_info; + s=(unz64_s*)file; + if (file==NULL) + return 0; //UNZ_PARAMERROR; + pfile_in_zip_read_info=s->pfile_in_zip_read; + if (pfile_in_zip_read_info==NULL) + return 0; //UNZ_PARAMERROR; + return pfile_in_zip_read_info->pos_in_zipfile + + pfile_in_zip_read_info->byte_before_the_zipfile; +} + +/** Addition for GDAL : END */ + +/* + Read bytes from the current file. + buf contain buffer where data must be copied + len the size of buf. + + return the number of byte copied if some bytes are copied + return 0 if the end of file was reached + return <0 with error code if there is an error + (UNZ_ERRNO for IO error, or zLib error for uncompress error) +*/ +extern int ZEXPORT unzReadCurrentFile(unzFile file, voidp buf, unsigned len) { + int err=UNZ_OK; + uInt iRead = 0; + unz64_s* s; + file_in_zip64_read_info_s* pfile_in_zip_read_info; + if (file==NULL) + return UNZ_PARAMERROR; + s=(unz64_s*)file; + pfile_in_zip_read_info=s->pfile_in_zip_read; + + if (pfile_in_zip_read_info==NULL) + return UNZ_PARAMERROR; + + + if (pfile_in_zip_read_info->read_buffer == NULL) + return UNZ_END_OF_LIST_OF_FILE; + if (len==0) + return 0; + + pfile_in_zip_read_info->stream.next_out = (Bytef*)buf; + + pfile_in_zip_read_info->stream.avail_out = (uInt)len; + + if ((len>pfile_in_zip_read_info->rest_read_uncompressed) && + (!(pfile_in_zip_read_info->raw))) + pfile_in_zip_read_info->stream.avail_out = + (uInt)pfile_in_zip_read_info->rest_read_uncompressed; + + if ((len>pfile_in_zip_read_info->rest_read_compressed+ + pfile_in_zip_read_info->stream.avail_in) && + (pfile_in_zip_read_info->raw)) + pfile_in_zip_read_info->stream.avail_out = + (uInt)pfile_in_zip_read_info->rest_read_compressed+ + pfile_in_zip_read_info->stream.avail_in; + + while (pfile_in_zip_read_info->stream.avail_out>0) + { + if ((pfile_in_zip_read_info->stream.avail_in==0) && + (pfile_in_zip_read_info->rest_read_compressed>0)) + { + uInt uReadThis = UNZ_BUFSIZE; + if (pfile_in_zip_read_info->rest_read_compressedrest_read_compressed; + if (uReadThis == 0) + return UNZ_EOF; + if (ZSEEK64(pfile_in_zip_read_info->z_filefunc, + pfile_in_zip_read_info->filestream, + pfile_in_zip_read_info->pos_in_zipfile + + pfile_in_zip_read_info->byte_before_the_zipfile, + ZLIB_FILEFUNC_SEEK_SET)!=0) + return UNZ_ERRNO; + if (ZREAD64(pfile_in_zip_read_info->z_filefunc, + pfile_in_zip_read_info->filestream, + pfile_in_zip_read_info->read_buffer, + uReadThis)!=uReadThis) + return UNZ_ERRNO; + + +# ifndef NOUNCRYPT + if(s->encrypted) + { + uInt i; + for(i=0;iread_buffer[i] = + zdecode(s->keys,s->pcrc_32_tab, + pfile_in_zip_read_info->read_buffer[i]); + } +# endif + + + pfile_in_zip_read_info->pos_in_zipfile += uReadThis; + + pfile_in_zip_read_info->rest_read_compressed-=uReadThis; + + pfile_in_zip_read_info->stream.next_in = + (Bytef*)pfile_in_zip_read_info->read_buffer; + pfile_in_zip_read_info->stream.avail_in = (uInt)uReadThis; + } + + if ((pfile_in_zip_read_info->compression_method==0) || (pfile_in_zip_read_info->raw)) + { + uInt uDoCopy,i ; + + if ((pfile_in_zip_read_info->stream.avail_in == 0) && + (pfile_in_zip_read_info->rest_read_compressed == 0)) + return (iRead==0) ? UNZ_EOF : (int)iRead; + + if (pfile_in_zip_read_info->stream.avail_out < + pfile_in_zip_read_info->stream.avail_in) + uDoCopy = pfile_in_zip_read_info->stream.avail_out ; + else + uDoCopy = pfile_in_zip_read_info->stream.avail_in ; + + for (i=0;istream.next_out+i) = + *(pfile_in_zip_read_info->stream.next_in+i); + + pfile_in_zip_read_info->total_out_64 = pfile_in_zip_read_info->total_out_64 + uDoCopy; + + pfile_in_zip_read_info->crc32 = crc32(pfile_in_zip_read_info->crc32, + pfile_in_zip_read_info->stream.next_out, + uDoCopy); + pfile_in_zip_read_info->rest_read_uncompressed-=uDoCopy; + pfile_in_zip_read_info->stream.avail_in -= uDoCopy; + pfile_in_zip_read_info->stream.avail_out -= uDoCopy; + pfile_in_zip_read_info->stream.next_out += uDoCopy; + pfile_in_zip_read_info->stream.next_in += uDoCopy; + pfile_in_zip_read_info->stream.total_out += uDoCopy; + iRead += uDoCopy; + } + else if (pfile_in_zip_read_info->compression_method==Z_BZIP2ED) + { +#ifdef HAVE_BZIP2 + uLong uTotalOutBefore,uTotalOutAfter; + const Bytef *bufBefore; + uLong uOutThis; + + pfile_in_zip_read_info->bstream.next_in = (char*)pfile_in_zip_read_info->stream.next_in; + pfile_in_zip_read_info->bstream.avail_in = pfile_in_zip_read_info->stream.avail_in; + pfile_in_zip_read_info->bstream.total_in_lo32 = pfile_in_zip_read_info->stream.total_in; + pfile_in_zip_read_info->bstream.total_in_hi32 = 0; + pfile_in_zip_read_info->bstream.next_out = (char*)pfile_in_zip_read_info->stream.next_out; + pfile_in_zip_read_info->bstream.avail_out = pfile_in_zip_read_info->stream.avail_out; + pfile_in_zip_read_info->bstream.total_out_lo32 = pfile_in_zip_read_info->stream.total_out; + pfile_in_zip_read_info->bstream.total_out_hi32 = 0; + + uTotalOutBefore = pfile_in_zip_read_info->bstream.total_out_lo32; + bufBefore = (const Bytef *)pfile_in_zip_read_info->bstream.next_out; + + err=BZ2_bzDecompress(&pfile_in_zip_read_info->bstream); + + uTotalOutAfter = pfile_in_zip_read_info->bstream.total_out_lo32; + uOutThis = uTotalOutAfter-uTotalOutBefore; + + pfile_in_zip_read_info->total_out_64 = pfile_in_zip_read_info->total_out_64 + uOutThis; + + pfile_in_zip_read_info->crc32 = crc32(pfile_in_zip_read_info->crc32,bufBefore, (uInt)(uOutThis)); + pfile_in_zip_read_info->rest_read_uncompressed -= uOutThis; + iRead += (uInt)(uTotalOutAfter - uTotalOutBefore); + + pfile_in_zip_read_info->stream.next_in = (Bytef*)pfile_in_zip_read_info->bstream.next_in; + pfile_in_zip_read_info->stream.avail_in = pfile_in_zip_read_info->bstream.avail_in; + pfile_in_zip_read_info->stream.total_in = pfile_in_zip_read_info->bstream.total_in_lo32; + pfile_in_zip_read_info->stream.next_out = (Bytef*)pfile_in_zip_read_info->bstream.next_out; + pfile_in_zip_read_info->stream.avail_out = pfile_in_zip_read_info->bstream.avail_out; + pfile_in_zip_read_info->stream.total_out = pfile_in_zip_read_info->bstream.total_out_lo32; + + if (err==BZ_STREAM_END) + return (iRead==0) ? UNZ_EOF : iRead; + if (err!=BZ_OK) + break; +#endif + } // end Z_BZIP2ED + else + { + ZPOS64_T uTotalOutBefore,uTotalOutAfter; + const Bytef *bufBefore; + ZPOS64_T uOutThis; + int flush=Z_SYNC_FLUSH; + + uTotalOutBefore = pfile_in_zip_read_info->stream.total_out; + bufBefore = pfile_in_zip_read_info->stream.next_out; + + /* + if ((pfile_in_zip_read_info->rest_read_uncompressed == + pfile_in_zip_read_info->stream.avail_out) && + (pfile_in_zip_read_info->rest_read_compressed == 0)) + flush = Z_FINISH; + */ + err=inflate(&pfile_in_zip_read_info->stream,flush); + + if ((err>=0) && (pfile_in_zip_read_info->stream.msg!=NULL)) + err = Z_DATA_ERROR; + + uTotalOutAfter = pfile_in_zip_read_info->stream.total_out; + /* Detect overflow, because z_stream.total_out is uLong (32 bits) */ + if (uTotalOutAftertotal_out_64 = pfile_in_zip_read_info->total_out_64 + uOutThis; + + pfile_in_zip_read_info->crc32 = + crc32(pfile_in_zip_read_info->crc32,bufBefore, + (uInt)(uOutThis)); + + pfile_in_zip_read_info->rest_read_uncompressed -= + uOutThis; + + iRead += (uInt)(uTotalOutAfter - uTotalOutBefore); + + if (err==Z_STREAM_END) + return (iRead==0) ? UNZ_EOF : (int)iRead; + if (err!=Z_OK) + break; + } + } + + if (err==Z_OK) + return (int)iRead; + return err; +} + + +/* + Give the current position in uncompressed data +*/ +extern z_off_t ZEXPORT unztell(unzFile file) { + unz64_s* s; + file_in_zip64_read_info_s* pfile_in_zip_read_info; + if (file==NULL) + return UNZ_PARAMERROR; + s=(unz64_s*)file; + pfile_in_zip_read_info=s->pfile_in_zip_read; + + if (pfile_in_zip_read_info==NULL) + return UNZ_PARAMERROR; + + return (z_off_t)pfile_in_zip_read_info->stream.total_out; +} + +extern ZPOS64_T ZEXPORT unztell64(unzFile file) { + + unz64_s* s; + file_in_zip64_read_info_s* pfile_in_zip_read_info; + if (file==NULL) + return (ZPOS64_T)-1; + s=(unz64_s*)file; + pfile_in_zip_read_info=s->pfile_in_zip_read; + + if (pfile_in_zip_read_info==NULL) + return (ZPOS64_T)-1; + + return pfile_in_zip_read_info->total_out_64; +} + + +/* + return 1 if the end of file was reached, 0 elsewhere +*/ +extern int ZEXPORT unzeof(unzFile file) { + unz64_s* s; + file_in_zip64_read_info_s* pfile_in_zip_read_info; + if (file==NULL) + return UNZ_PARAMERROR; + s=(unz64_s*)file; + pfile_in_zip_read_info=s->pfile_in_zip_read; + + if (pfile_in_zip_read_info==NULL) + return UNZ_PARAMERROR; + + if (pfile_in_zip_read_info->rest_read_uncompressed == 0) + return 1; + else + return 0; +} + + + +/* +Read extra field from the current file (opened by unzOpenCurrentFile) +This is the local-header version of the extra field (sometimes, there is +more info in the local-header version than in the central-header) + + if buf==NULL, it return the size of the local extra field that can be read + + if buf!=NULL, len is the size of the buffer, the extra header is copied in + buf. + the return value is the number of bytes copied in buf, or (if <0) + the error code +*/ +extern int ZEXPORT unzGetLocalExtrafield(unzFile file, voidp buf, unsigned len) { + unz64_s* s; + file_in_zip64_read_info_s* pfile_in_zip_read_info; + uInt read_now; + ZPOS64_T size_to_read; + + if (file==NULL) + return UNZ_PARAMERROR; + s=(unz64_s*)file; + pfile_in_zip_read_info=s->pfile_in_zip_read; + + if (pfile_in_zip_read_info==NULL) + return UNZ_PARAMERROR; + + size_to_read = (pfile_in_zip_read_info->size_local_extrafield - + pfile_in_zip_read_info->pos_local_extrafield); + + if (buf==NULL) + return (int)size_to_read; + + if (len>size_to_read) + read_now = (uInt)size_to_read; + else + read_now = (uInt)len ; + + if (read_now==0) + return 0; + + if (ZSEEK64(pfile_in_zip_read_info->z_filefunc, + pfile_in_zip_read_info->filestream, + pfile_in_zip_read_info->offset_local_extrafield + + pfile_in_zip_read_info->pos_local_extrafield, + ZLIB_FILEFUNC_SEEK_SET)!=0) + return UNZ_ERRNO; + + if (ZREAD64(pfile_in_zip_read_info->z_filefunc, + pfile_in_zip_read_info->filestream, + buf,read_now)!=read_now) + return UNZ_ERRNO; + + return (int)read_now; +} + +/* + Close the file in zip opened with unzOpenCurrentFile + Return UNZ_CRCERROR if all the file was read but the CRC is not good +*/ +extern int ZEXPORT unzCloseCurrentFile(unzFile file) { + int err=UNZ_OK; + + unz64_s* s; + file_in_zip64_read_info_s* pfile_in_zip_read_info; + if (file==NULL) + return UNZ_PARAMERROR; + s=(unz64_s*)file; + pfile_in_zip_read_info=s->pfile_in_zip_read; + + if (pfile_in_zip_read_info==NULL) + return UNZ_PARAMERROR; + + + if ((pfile_in_zip_read_info->rest_read_uncompressed == 0) && + (!pfile_in_zip_read_info->raw)) + { + if (pfile_in_zip_read_info->crc32 != pfile_in_zip_read_info->crc32_wait) + err=UNZ_CRCERROR; + } + + + free(pfile_in_zip_read_info->read_buffer); + pfile_in_zip_read_info->read_buffer = NULL; + if (pfile_in_zip_read_info->stream_initialised == Z_DEFLATED) + inflateEnd(&pfile_in_zip_read_info->stream); +#ifdef HAVE_BZIP2 + else if (pfile_in_zip_read_info->stream_initialised == Z_BZIP2ED) + BZ2_bzDecompressEnd(&pfile_in_zip_read_info->bstream); +#endif + + + pfile_in_zip_read_info->stream_initialised = 0; + free(pfile_in_zip_read_info); + + s->pfile_in_zip_read=NULL; + + return err; +} + + +/* + Get the global comment string of the ZipFile, in the szComment buffer. + uSizeBuf is the size of the szComment buffer. + return the number of byte copied or an error code <0 +*/ +extern int ZEXPORT unzGetGlobalComment(unzFile file, char * szComment, uLong uSizeBuf) { + unz64_s* s; + uLong uReadThis ; + if (file==NULL) + return (int)UNZ_PARAMERROR; + s=(unz64_s*)file; + + uReadThis = uSizeBuf; + if (uReadThis>s->gi.size_comment) + uReadThis = s->gi.size_comment; + + if (ZSEEK64(s->z_filefunc,s->filestream,s->central_pos+22,ZLIB_FILEFUNC_SEEK_SET)!=0) + return UNZ_ERRNO; + + if (uReadThis>0) + { + *szComment='\0'; + if (ZREAD64(s->z_filefunc,s->filestream,szComment,uReadThis)!=uReadThis) + return UNZ_ERRNO; + } + + if ((szComment != NULL) && (uSizeBuf > s->gi.size_comment)) + *(szComment+s->gi.size_comment)='\0'; + return (int)uReadThis; +} + +/* Additions by RX '2004 */ +extern ZPOS64_T ZEXPORT unzGetOffset64(unzFile file) { + unz64_s* s; + + if (file==NULL) + return 0; //UNZ_PARAMERROR; + s=(unz64_s*)file; + if (!s->current_file_ok) + return 0; + if (s->gi.number_entry != 0 && s->gi.number_entry != 0xffff) + if (s->num_file==s->gi.number_entry) + return 0; + return s->pos_in_central_dir; +} + +extern uLong ZEXPORT unzGetOffset(unzFile file) { + ZPOS64_T offset64; + + if (file==NULL) + return 0; //UNZ_PARAMERROR; + offset64 = unzGetOffset64(file); + return (uLong)offset64; +} + +extern int ZEXPORT unzSetOffset64(unzFile file, ZPOS64_T pos) { + unz64_s* s; + int err; + + if (file==NULL) + return UNZ_PARAMERROR; + s=(unz64_s*)file; + + s->pos_in_central_dir = pos; + s->num_file = s->gi.number_entry; /* hack */ + err = unz64local_GetCurrentFileInfoInternal(file,&s->cur_file_info, + &s->cur_file_info_internal, + NULL,0,NULL,0,NULL,0); + s->current_file_ok = (err == UNZ_OK); + return err; +} + +extern int ZEXPORT unzSetOffset (unzFile file, uLong pos) { + return unzSetOffset64(file,pos); +} diff --git a/c-blosc/internal-complibs/zlib-1.3.1/contrib/minizip/unzip.h b/c-blosc/internal-complibs/zlib-1.3.1/contrib/minizip/unzip.h new file mode 100644 index 0000000..5cfc9c6 --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.3.1/contrib/minizip/unzip.h @@ -0,0 +1,437 @@ +/* unzip.h -- IO for uncompress .zip files using zlib + Version 1.1, February 14h, 2010 + part of the MiniZip project - ( http://www.winimage.com/zLibDll/minizip.html ) + + Copyright (C) 1998-2010 Gilles Vollant (minizip) ( http://www.winimage.com/zLibDll/minizip.html ) + + Modifications of Unzip for Zip64 + Copyright (C) 2007-2008 Even Rouault + + Modifications for Zip64 support on both zip and unzip + Copyright (C) 2009-2010 Mathias Svensson ( http://result42.com ) + + For more info read MiniZip_info.txt + + --------------------------------------------------------------------------------- + + Condition of use and distribution are the same than zlib : + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + --------------------------------------------------------------------------------- + + Changes + + See header of unzip64.c + +*/ + +#ifndef _unz64_H +#define _unz64_H + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef _ZLIB_H +#include "zlib.h" +#endif + +#ifndef _ZLIBIOAPI_H +#include "ioapi.h" +#endif + +#ifdef HAVE_BZIP2 +#include "bzlib.h" +#endif + +#define Z_BZIP2ED 12 + +#if defined(STRICTUNZIP) || defined(STRICTZIPUNZIP) +/* like the STRICT of WIN32, we define a pointer that cannot be converted + from (void*) without cast */ +typedef struct TagunzFile__ { int unused; } unzFile__; +typedef unzFile__ *unzFile; +#else +typedef voidp unzFile; +#endif + + +#define UNZ_OK (0) +#define UNZ_END_OF_LIST_OF_FILE (-100) +#define UNZ_ERRNO (Z_ERRNO) +#define UNZ_EOF (0) +#define UNZ_PARAMERROR (-102) +#define UNZ_BADZIPFILE (-103) +#define UNZ_INTERNALERROR (-104) +#define UNZ_CRCERROR (-105) + +/* tm_unz contain date/time info */ +typedef struct tm_unz_s +{ + int tm_sec; /* seconds after the minute - [0,59] */ + int tm_min; /* minutes after the hour - [0,59] */ + int tm_hour; /* hours since midnight - [0,23] */ + int tm_mday; /* day of the month - [1,31] */ + int tm_mon; /* months since January - [0,11] */ + int tm_year; /* years - [1980..2044] */ +} tm_unz; + +/* unz_global_info structure contain global data about the ZIPfile + These data comes from the end of central dir */ +typedef struct unz_global_info64_s +{ + ZPOS64_T number_entry; /* total number of entries in + the central dir on this disk */ + uLong size_comment; /* size of the global comment of the zipfile */ +} unz_global_info64; + +typedef struct unz_global_info_s +{ + uLong number_entry; /* total number of entries in + the central dir on this disk */ + uLong size_comment; /* size of the global comment of the zipfile */ +} unz_global_info; + +/* unz_file_info contain information about a file in the zipfile */ +typedef struct unz_file_info64_s +{ + uLong version; /* version made by 2 bytes */ + uLong version_needed; /* version needed to extract 2 bytes */ + uLong flag; /* general purpose bit flag 2 bytes */ + uLong compression_method; /* compression method 2 bytes */ + uLong dosDate; /* last mod file date in Dos fmt 4 bytes */ + uLong crc; /* crc-32 4 bytes */ + ZPOS64_T compressed_size; /* compressed size 8 bytes */ + ZPOS64_T uncompressed_size; /* uncompressed size 8 bytes */ + uLong size_filename; /* filename length 2 bytes */ + uLong size_file_extra; /* extra field length 2 bytes */ + uLong size_file_comment; /* file comment length 2 bytes */ + + uLong disk_num_start; /* disk number start 2 bytes */ + uLong internal_fa; /* internal file attributes 2 bytes */ + uLong external_fa; /* external file attributes 4 bytes */ + + tm_unz tmu_date; +} unz_file_info64; + +typedef struct unz_file_info_s +{ + uLong version; /* version made by 2 bytes */ + uLong version_needed; /* version needed to extract 2 bytes */ + uLong flag; /* general purpose bit flag 2 bytes */ + uLong compression_method; /* compression method 2 bytes */ + uLong dosDate; /* last mod file date in Dos fmt 4 bytes */ + uLong crc; /* crc-32 4 bytes */ + uLong compressed_size; /* compressed size 4 bytes */ + uLong uncompressed_size; /* uncompressed size 4 bytes */ + uLong size_filename; /* filename length 2 bytes */ + uLong size_file_extra; /* extra field length 2 bytes */ + uLong size_file_comment; /* file comment length 2 bytes */ + + uLong disk_num_start; /* disk number start 2 bytes */ + uLong internal_fa; /* internal file attributes 2 bytes */ + uLong external_fa; /* external file attributes 4 bytes */ + + tm_unz tmu_date; +} unz_file_info; + +extern int ZEXPORT unzStringFileNameCompare(const char* fileName1, + const char* fileName2, + int iCaseSensitivity); +/* + Compare two filenames (fileName1,fileName2). + If iCaseSensitivity = 1, comparison is case sensitive (like strcmp) + If iCaseSensitivity = 2, comparison is not case sensitive (like strcmpi + or strcasecmp) + If iCaseSensitivity = 0, case sensitivity is default of your operating system + (like 1 on Unix, 2 on Windows) +*/ + + +extern unzFile ZEXPORT unzOpen(const char *path); +extern unzFile ZEXPORT unzOpen64(const void *path); +/* + Open a Zip file. path contain the full pathname (by example, + on a Windows XP computer "c:\\zlib\\zlib113.zip" or on an Unix computer + "zlib/zlib113.zip". + If the zipfile cannot be opened (file don't exist or in not valid), the + return value is NULL. + Else, the return value is a unzFile Handle, usable with other function + of this unzip package. + the "64" function take a const void* pointer, because the path is just the + value passed to the open64_file_func callback. + Under Windows, if UNICODE is defined, using fill_fopen64_filefunc, the path + is a pointer to a wide unicode string (LPCTSTR is LPCWSTR), so const char* + does not describe the reality +*/ + + +extern unzFile ZEXPORT unzOpen2(const char *path, + zlib_filefunc_def* pzlib_filefunc_def); +/* + Open a Zip file, like unzOpen, but provide a set of file low level API + for read/write the zip file (see ioapi.h) +*/ + +extern unzFile ZEXPORT unzOpen2_64(const void *path, + zlib_filefunc64_def* pzlib_filefunc_def); +/* + Open a Zip file, like unz64Open, but provide a set of file low level API + for read/write the zip file (see ioapi.h) +*/ + +extern int ZEXPORT unzClose(unzFile file); +/* + Close a ZipFile opened with unzOpen. + If there is files inside the .Zip opened with unzOpenCurrentFile (see later), + these files MUST be closed with unzCloseCurrentFile before call unzClose. + return UNZ_OK if there is no problem. */ + +extern int ZEXPORT unzGetGlobalInfo(unzFile file, + unz_global_info *pglobal_info); + +extern int ZEXPORT unzGetGlobalInfo64(unzFile file, + unz_global_info64 *pglobal_info); +/* + Write info about the ZipFile in the *pglobal_info structure. + No preparation of the structure is needed + return UNZ_OK if there is no problem. */ + + +extern int ZEXPORT unzGetGlobalComment(unzFile file, + char *szComment, + uLong uSizeBuf); +/* + Get the global comment string of the ZipFile, in the szComment buffer. + uSizeBuf is the size of the szComment buffer. + return the number of byte copied or an error code <0 +*/ + + +/***************************************************************************/ +/* Unzip package allow you browse the directory of the zipfile */ + +extern int ZEXPORT unzGoToFirstFile(unzFile file); +/* + Set the current file of the zipfile to the first file. + return UNZ_OK if there is no problem +*/ + +extern int ZEXPORT unzGoToNextFile(unzFile file); +/* + Set the current file of the zipfile to the next file. + return UNZ_OK if there is no problem + return UNZ_END_OF_LIST_OF_FILE if the actual file was the latest. +*/ + +extern int ZEXPORT unzLocateFile(unzFile file, + const char *szFileName, + int iCaseSensitivity); +/* + Try locate the file szFileName in the zipfile. + For the iCaseSensitivity signification, see unzStringFileNameCompare + + return value : + UNZ_OK if the file is found. It becomes the current file. + UNZ_END_OF_LIST_OF_FILE if the file is not found +*/ + + +/* ****************************************** */ +/* Ryan supplied functions */ +/* unz_file_info contain information about a file in the zipfile */ +typedef struct unz_file_pos_s +{ + uLong pos_in_zip_directory; /* offset in zip file directory */ + uLong num_of_file; /* # of file */ +} unz_file_pos; + +extern int ZEXPORT unzGetFilePos( + unzFile file, + unz_file_pos* file_pos); + +extern int ZEXPORT unzGoToFilePos( + unzFile file, + unz_file_pos* file_pos); + +typedef struct unz64_file_pos_s +{ + ZPOS64_T pos_in_zip_directory; /* offset in zip file directory */ + ZPOS64_T num_of_file; /* # of file */ +} unz64_file_pos; + +extern int ZEXPORT unzGetFilePos64( + unzFile file, + unz64_file_pos* file_pos); + +extern int ZEXPORT unzGoToFilePos64( + unzFile file, + const unz64_file_pos* file_pos); + +/* ****************************************** */ + +extern int ZEXPORT unzGetCurrentFileInfo64(unzFile file, + unz_file_info64 *pfile_info, + char *szFileName, + uLong fileNameBufferSize, + void *extraField, + uLong extraFieldBufferSize, + char *szComment, + uLong commentBufferSize); + +extern int ZEXPORT unzGetCurrentFileInfo(unzFile file, + unz_file_info *pfile_info, + char *szFileName, + uLong fileNameBufferSize, + void *extraField, + uLong extraFieldBufferSize, + char *szComment, + uLong commentBufferSize); +/* + Get Info about the current file + if pfile_info!=NULL, the *pfile_info structure will contain some info about + the current file + if szFileName!=NULL, the filename string will be copied in szFileName + (fileNameBufferSize is the size of the buffer) + if extraField!=NULL, the extra field information will be copied in extraField + (extraFieldBufferSize is the size of the buffer). + This is the Central-header version of the extra field + if szComment!=NULL, the comment string of the file will be copied in szComment + (commentBufferSize is the size of the buffer) +*/ + + +/** Addition for GDAL : START */ + +extern ZPOS64_T ZEXPORT unzGetCurrentFileZStreamPos64(unzFile file); + +/** Addition for GDAL : END */ + + +/***************************************************************************/ +/* for reading the content of the current zipfile, you can open it, read data + from it, and close it (you can close it before reading all the file) + */ + +extern int ZEXPORT unzOpenCurrentFile(unzFile file); +/* + Open for reading data the current file in the zipfile. + If there is no error, the return value is UNZ_OK. +*/ + +extern int ZEXPORT unzOpenCurrentFilePassword(unzFile file, + const char* password); +/* + Open for reading data the current file in the zipfile. + password is a crypting password + If there is no error, the return value is UNZ_OK. +*/ + +extern int ZEXPORT unzOpenCurrentFile2(unzFile file, + int* method, + int* level, + int raw); +/* + Same than unzOpenCurrentFile, but open for read raw the file (not uncompress) + if raw==1 + *method will receive method of compression, *level will receive level of + compression + note : you can set level parameter as NULL (if you did not want known level, + but you CANNOT set method parameter as NULL +*/ + +extern int ZEXPORT unzOpenCurrentFile3(unzFile file, + int* method, + int* level, + int raw, + const char* password); +/* + Same than unzOpenCurrentFile, but open for read raw the file (not uncompress) + if raw==1 + *method will receive method of compression, *level will receive level of + compression + note : you can set level parameter as NULL (if you did not want known level, + but you CANNOT set method parameter as NULL +*/ + + +extern int ZEXPORT unzCloseCurrentFile(unzFile file); +/* + Close the file in zip opened with unzOpenCurrentFile + Return UNZ_CRCERROR if all the file was read but the CRC is not good +*/ + +extern int ZEXPORT unzReadCurrentFile(unzFile file, + voidp buf, + unsigned len); +/* + Read bytes from the current file (opened by unzOpenCurrentFile) + buf contain buffer where data must be copied + len the size of buf. + + return the number of byte copied if some bytes are copied + return 0 if the end of file was reached + return <0 with error code if there is an error + (UNZ_ERRNO for IO error, or zLib error for uncompress error) +*/ + +extern z_off_t ZEXPORT unztell(unzFile file); + +extern ZPOS64_T ZEXPORT unztell64(unzFile file); +/* + Give the current position in uncompressed data +*/ + +extern int ZEXPORT unzeof(unzFile file); +/* + return 1 if the end of file was reached, 0 elsewhere +*/ + +extern int ZEXPORT unzGetLocalExtrafield(unzFile file, + voidp buf, + unsigned len); +/* + Read extra field from the current file (opened by unzOpenCurrentFile) + This is the local-header version of the extra field (sometimes, there is + more info in the local-header version than in the central-header) + + if buf==NULL, it return the size of the local extra field + + if buf!=NULL, len is the size of the buffer, the extra header is copied in + buf. + the return value is the number of bytes copied in buf, or (if <0) + the error code +*/ + +/***************************************************************************/ + +/* Get the current file offset */ +extern ZPOS64_T ZEXPORT unzGetOffset64 (unzFile file); +extern uLong ZEXPORT unzGetOffset (unzFile file); + +/* Set the current file offset */ +extern int ZEXPORT unzSetOffset64 (unzFile file, ZPOS64_T pos); +extern int ZEXPORT unzSetOffset (unzFile file, uLong pos); + + + +#ifdef __cplusplus +} +#endif + +#endif /* _unz64_H */ diff --git a/c-blosc/internal-complibs/zlib-1.3.1/contrib/minizip/zip.c b/c-blosc/internal-complibs/zlib-1.3.1/contrib/minizip/zip.c new file mode 100644 index 0000000..60bdffa --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.3.1/contrib/minizip/zip.c @@ -0,0 +1,1956 @@ +/* zip.c -- IO on .zip files using zlib + Version 1.1, February 14h, 2010 + part of the MiniZip project - ( http://www.winimage.com/zLibDll/minizip.html ) + + Copyright (C) 1998-2010 Gilles Vollant (minizip) ( http://www.winimage.com/zLibDll/minizip.html ) + + Modifications for Zip64 support + Copyright (C) 2009-2010 Mathias Svensson ( http://result42.com ) + + For more info read MiniZip_info.txt + + Changes + Oct-2009 - Mathias Svensson - Remove old C style function prototypes + Oct-2009 - Mathias Svensson - Added Zip64 Support when creating new file archives + Oct-2009 - Mathias Svensson - Did some code cleanup and refactoring to get better overview of some functions. + Oct-2009 - Mathias Svensson - Added zipRemoveExtraInfoBlock to strip extra field data from its ZIP64 data + It is used when recreating zip archive with RAW when deleting items from a zip. + ZIP64 data is automatically added to items that needs it, and existing ZIP64 data need to be removed. + Oct-2009 - Mathias Svensson - Added support for BZIP2 as compression mode (bzip2 lib is required) + Jan-2010 - back to unzip and minizip 1.0 name scheme, with compatibility layer + +*/ + + +#include +#include +#include +#include +#include +#include "zlib.h" +#include "zip.h" + +#ifdef STDC +# include +#endif +#ifdef NO_ERRNO_H + extern int errno; +#else +# include +#endif + + +#ifndef local +# define local static +#endif +/* compile with -Dlocal if your debugger can't find static symbols */ + +#ifndef VERSIONMADEBY +# define VERSIONMADEBY (0x0) /* platform dependent */ +#endif + +#ifndef Z_BUFSIZE +#define Z_BUFSIZE (64*1024) //(16384) +#endif + +#ifndef Z_MAXFILENAMEINZIP +#define Z_MAXFILENAMEINZIP (256) +#endif + +#ifndef ALLOC +# define ALLOC(size) (malloc(size)) +#endif + +/* +#define SIZECENTRALDIRITEM (0x2e) +#define SIZEZIPLOCALHEADER (0x1e) +*/ + +/* I've found an old Unix (a SunOS 4.1.3_U1) without all SEEK_* defined.... */ + + +// NOT sure that this work on ALL platform +#define MAKEULONG64(a, b) ((ZPOS64_T)(((unsigned long)(a)) | ((ZPOS64_T)((unsigned long)(b))) << 32)) + +#ifndef SEEK_CUR +#define SEEK_CUR 1 +#endif + +#ifndef SEEK_END +#define SEEK_END 2 +#endif + +#ifndef SEEK_SET +#define SEEK_SET 0 +#endif + +#ifndef DEF_MEM_LEVEL +#if MAX_MEM_LEVEL >= 8 +# define DEF_MEM_LEVEL 8 +#else +# define DEF_MEM_LEVEL MAX_MEM_LEVEL +#endif +#endif +const char zip_copyright[] =" zip 1.01 Copyright 1998-2004 Gilles Vollant - http://www.winimage.com/zLibDll"; + + +#define SIZEDATA_INDATABLOCK (4096-(4*4)) + +#define LOCALHEADERMAGIC (0x04034b50) +#define CENTRALHEADERMAGIC (0x02014b50) +#define ENDHEADERMAGIC (0x06054b50) +#define ZIP64ENDHEADERMAGIC (0x6064b50) +#define ZIP64ENDLOCHEADERMAGIC (0x7064b50) + +#define FLAG_LOCALHEADER_OFFSET (0x06) +#define CRC_LOCALHEADER_OFFSET (0x0e) + +#define SIZECENTRALHEADER (0x2e) /* 46 */ + +typedef struct linkedlist_datablock_internal_s +{ + struct linkedlist_datablock_internal_s* next_datablock; + uLong avail_in_this_block; + uLong filled_in_this_block; + uLong unused; /* for future use and alignment */ + unsigned char data[SIZEDATA_INDATABLOCK]; +} linkedlist_datablock_internal; + +typedef struct linkedlist_data_s +{ + linkedlist_datablock_internal* first_block; + linkedlist_datablock_internal* last_block; +} linkedlist_data; + + +typedef struct +{ + z_stream stream; /* zLib stream structure for inflate */ +#ifdef HAVE_BZIP2 + bz_stream bstream; /* bzLib stream structure for bziped */ +#endif + + int stream_initialised; /* 1 is stream is initialised */ + uInt pos_in_buffered_data; /* last written byte in buffered_data */ + + ZPOS64_T pos_local_header; /* offset of the local header of the file + currently writing */ + char* central_header; /* central header data for the current file */ + uLong size_centralExtra; + uLong size_centralheader; /* size of the central header for cur file */ + uLong size_centralExtraFree; /* Extra bytes allocated to the centralheader but that are not used */ + uLong flag; /* flag of the file currently writing */ + + int method; /* compression method of file currently wr.*/ + int raw; /* 1 for directly writing raw data */ + Byte buffered_data[Z_BUFSIZE];/* buffer contain compressed data to be writ*/ + uLong dosDate; + uLong crc32; + int encrypt; + int zip64; /* Add ZIP64 extended information in the extra field */ + ZPOS64_T pos_zip64extrainfo; + ZPOS64_T totalCompressedData; + ZPOS64_T totalUncompressedData; +#ifndef NOCRYPT + unsigned long keys[3]; /* keys defining the pseudo-random sequence */ + const z_crc_t* pcrc_32_tab; + unsigned crypt_header_size; +#endif +} curfile64_info; + +typedef struct +{ + zlib_filefunc64_32_def z_filefunc; + voidpf filestream; /* io structure of the zipfile */ + linkedlist_data central_dir;/* datablock with central dir in construction*/ + int in_opened_file_inzip; /* 1 if a file in the zip is currently writ.*/ + curfile64_info ci; /* info on the file currently writing */ + + ZPOS64_T begin_pos; /* position of the beginning of the zipfile */ + ZPOS64_T add_position_when_writing_offset; + ZPOS64_T number_entry; + +#ifndef NO_ADDFILEINEXISTINGZIP + char *globalcomment; +#endif + +} zip64_internal; + + +#ifndef NOCRYPT +#define INCLUDECRYPTINGCODE_IFCRYPTALLOWED +#include "crypt.h" +#endif + +local linkedlist_datablock_internal* allocate_new_datablock(void) { + linkedlist_datablock_internal* ldi; + ldi = (linkedlist_datablock_internal*) + ALLOC(sizeof(linkedlist_datablock_internal)); + if (ldi!=NULL) + { + ldi->next_datablock = NULL ; + ldi->filled_in_this_block = 0 ; + ldi->avail_in_this_block = SIZEDATA_INDATABLOCK ; + } + return ldi; +} + +local void free_datablock(linkedlist_datablock_internal* ldi) { + while (ldi!=NULL) + { + linkedlist_datablock_internal* ldinext = ldi->next_datablock; + free(ldi); + ldi = ldinext; + } +} + +local void init_linkedlist(linkedlist_data* ll) { + ll->first_block = ll->last_block = NULL; +} + +local void free_linkedlist(linkedlist_data* ll) { + free_datablock(ll->first_block); + ll->first_block = ll->last_block = NULL; +} + + +local int add_data_in_datablock(linkedlist_data* ll, const void* buf, uLong len) { + linkedlist_datablock_internal* ldi; + const unsigned char* from_copy; + + if (ll==NULL) + return ZIP_INTERNALERROR; + + if (ll->last_block == NULL) + { + ll->first_block = ll->last_block = allocate_new_datablock(); + if (ll->first_block == NULL) + return ZIP_INTERNALERROR; + } + + ldi = ll->last_block; + from_copy = (const unsigned char*)buf; + + while (len>0) + { + uInt copy_this; + uInt i; + unsigned char* to_copy; + + if (ldi->avail_in_this_block==0) + { + ldi->next_datablock = allocate_new_datablock(); + if (ldi->next_datablock == NULL) + return ZIP_INTERNALERROR; + ldi = ldi->next_datablock ; + ll->last_block = ldi; + } + + if (ldi->avail_in_this_block < len) + copy_this = (uInt)ldi->avail_in_this_block; + else + copy_this = (uInt)len; + + to_copy = &(ldi->data[ldi->filled_in_this_block]); + + for (i=0;ifilled_in_this_block += copy_this; + ldi->avail_in_this_block -= copy_this; + from_copy += copy_this ; + len -= copy_this; + } + return ZIP_OK; +} + + + +/****************************************************************************/ + +#ifndef NO_ADDFILEINEXISTINGZIP +/* =========================================================================== + Inputs a long in LSB order to the given file + nbByte == 1, 2 ,4 or 8 (byte, short or long, ZPOS64_T) +*/ + +local int zip64local_putValue(const zlib_filefunc64_32_def* pzlib_filefunc_def, voidpf filestream, ZPOS64_T x, int nbByte) { + unsigned char buf[8]; + int n; + for (n = 0; n < nbByte; n++) + { + buf[n] = (unsigned char)(x & 0xff); + x >>= 8; + } + if (x != 0) + { /* data overflow - hack for ZIP64 (X Roche) */ + for (n = 0; n < nbByte; n++) + { + buf[n] = 0xff; + } + } + + if (ZWRITE64(*pzlib_filefunc_def,filestream,buf,(uLong)nbByte)!=(uLong)nbByte) + return ZIP_ERRNO; + else + return ZIP_OK; +} + +local void zip64local_putValue_inmemory (void* dest, ZPOS64_T x, int nbByte) { + unsigned char* buf=(unsigned char*)dest; + int n; + for (n = 0; n < nbByte; n++) { + buf[n] = (unsigned char)(x & 0xff); + x >>= 8; + } + + if (x != 0) + { /* data overflow - hack for ZIP64 */ + for (n = 0; n < nbByte; n++) + { + buf[n] = 0xff; + } + } +} + +/****************************************************************************/ + + +local uLong zip64local_TmzDateToDosDate(const tm_zip* ptm) { + uLong year = (uLong)ptm->tm_year; + if (year>=1980) + year-=1980; + else if (year>=80) + year-=80; + return + (uLong) (((uLong)(ptm->tm_mday) + (32 * (uLong)(ptm->tm_mon+1)) + (512 * year)) << 16) | + (((uLong)ptm->tm_sec/2) + (32 * (uLong)ptm->tm_min) + (2048 * (uLong)ptm->tm_hour)); +} + + +/****************************************************************************/ + +local int zip64local_getByte(const zlib_filefunc64_32_def* pzlib_filefunc_def, voidpf filestream, int* pi) { + unsigned char c; + int err = (int)ZREAD64(*pzlib_filefunc_def,filestream,&c,1); + if (err==1) + { + *pi = (int)c; + return ZIP_OK; + } + else + { + if (ZERROR64(*pzlib_filefunc_def,filestream)) + return ZIP_ERRNO; + else + return ZIP_EOF; + } +} + + +/* =========================================================================== + Reads a long in LSB order from the given gz_stream. Sets +*/ +local int zip64local_getShort(const zlib_filefunc64_32_def* pzlib_filefunc_def, voidpf filestream, uLong* pX) { + uLong x ; + int i = 0; + int err; + + err = zip64local_getByte(pzlib_filefunc_def,filestream,&i); + x = (uLong)i; + + if (err==ZIP_OK) + err = zip64local_getByte(pzlib_filefunc_def,filestream,&i); + x += ((uLong)i)<<8; + + if (err==ZIP_OK) + *pX = x; + else + *pX = 0; + return err; +} + +local int zip64local_getLong(const zlib_filefunc64_32_def* pzlib_filefunc_def, voidpf filestream, uLong* pX) { + uLong x ; + int i = 0; + int err; + + err = zip64local_getByte(pzlib_filefunc_def,filestream,&i); + x = (uLong)i; + + if (err==ZIP_OK) + err = zip64local_getByte(pzlib_filefunc_def,filestream,&i); + x += ((uLong)i)<<8; + + if (err==ZIP_OK) + err = zip64local_getByte(pzlib_filefunc_def,filestream,&i); + x += ((uLong)i)<<16; + + if (err==ZIP_OK) + err = zip64local_getByte(pzlib_filefunc_def,filestream,&i); + x += ((uLong)i)<<24; + + if (err==ZIP_OK) + *pX = x; + else + *pX = 0; + return err; +} + + +local int zip64local_getLong64(const zlib_filefunc64_32_def* pzlib_filefunc_def, voidpf filestream, ZPOS64_T *pX) { + ZPOS64_T x; + int i = 0; + int err; + + err = zip64local_getByte(pzlib_filefunc_def,filestream,&i); + x = (ZPOS64_T)i; + + if (err==ZIP_OK) + err = zip64local_getByte(pzlib_filefunc_def,filestream,&i); + x += ((ZPOS64_T)i)<<8; + + if (err==ZIP_OK) + err = zip64local_getByte(pzlib_filefunc_def,filestream,&i); + x += ((ZPOS64_T)i)<<16; + + if (err==ZIP_OK) + err = zip64local_getByte(pzlib_filefunc_def,filestream,&i); + x += ((ZPOS64_T)i)<<24; + + if (err==ZIP_OK) + err = zip64local_getByte(pzlib_filefunc_def,filestream,&i); + x += ((ZPOS64_T)i)<<32; + + if (err==ZIP_OK) + err = zip64local_getByte(pzlib_filefunc_def,filestream,&i); + x += ((ZPOS64_T)i)<<40; + + if (err==ZIP_OK) + err = zip64local_getByte(pzlib_filefunc_def,filestream,&i); + x += ((ZPOS64_T)i)<<48; + + if (err==ZIP_OK) + err = zip64local_getByte(pzlib_filefunc_def,filestream,&i); + x += ((ZPOS64_T)i)<<56; + + if (err==ZIP_OK) + *pX = x; + else + *pX = 0; + + return err; +} + +#ifndef BUFREADCOMMENT +#define BUFREADCOMMENT (0x400) +#endif +/* + Locate the Central directory of a zipfile (at the end, just before + the global comment) +*/ +local ZPOS64_T zip64local_SearchCentralDir(const zlib_filefunc64_32_def* pzlib_filefunc_def, voidpf filestream) { + unsigned char* buf; + ZPOS64_T uSizeFile; + ZPOS64_T uBackRead; + ZPOS64_T uMaxBack=0xffff; /* maximum size of global comment */ + ZPOS64_T uPosFound=0; + + if (ZSEEK64(*pzlib_filefunc_def,filestream,0,ZLIB_FILEFUNC_SEEK_END) != 0) + return 0; + + + uSizeFile = ZTELL64(*pzlib_filefunc_def,filestream); + + if (uMaxBack>uSizeFile) + uMaxBack = uSizeFile; + + buf = (unsigned char*)ALLOC(BUFREADCOMMENT+4); + if (buf==NULL) + return 0; + + uBackRead = 4; + while (uBackReaduMaxBack) + uBackRead = uMaxBack; + else + uBackRead+=BUFREADCOMMENT; + uReadPos = uSizeFile-uBackRead ; + + uReadSize = ((BUFREADCOMMENT+4) < (uSizeFile-uReadPos)) ? + (BUFREADCOMMENT+4) : (uLong)(uSizeFile-uReadPos); + if (ZSEEK64(*pzlib_filefunc_def,filestream,uReadPos,ZLIB_FILEFUNC_SEEK_SET)!=0) + break; + + if (ZREAD64(*pzlib_filefunc_def,filestream,buf,uReadSize)!=uReadSize) + break; + + for (i=(int)uReadSize-3; (i--)>0;) + if (((*(buf+i))==0x50) && ((*(buf+i+1))==0x4b) && + ((*(buf+i+2))==0x05) && ((*(buf+i+3))==0x06)) + { + uPosFound = uReadPos+(unsigned)i; + break; + } + + if (uPosFound!=0) + break; + } + free(buf); + return uPosFound; +} + +/* +Locate the End of Zip64 Central directory locator and from there find the CD of a zipfile (at the end, just before +the global comment) +*/ +local ZPOS64_T zip64local_SearchCentralDir64(const zlib_filefunc64_32_def* pzlib_filefunc_def, voidpf filestream) { + unsigned char* buf; + ZPOS64_T uSizeFile; + ZPOS64_T uBackRead; + ZPOS64_T uMaxBack=0xffff; /* maximum size of global comment */ + ZPOS64_T uPosFound=0; + uLong uL; + ZPOS64_T relativeOffset; + + if (ZSEEK64(*pzlib_filefunc_def,filestream,0,ZLIB_FILEFUNC_SEEK_END) != 0) + return 0; + + uSizeFile = ZTELL64(*pzlib_filefunc_def,filestream); + + if (uMaxBack>uSizeFile) + uMaxBack = uSizeFile; + + buf = (unsigned char*)ALLOC(BUFREADCOMMENT+4); + if (buf==NULL) + return 0; + + uBackRead = 4; + while (uBackReaduMaxBack) + uBackRead = uMaxBack; + else + uBackRead+=BUFREADCOMMENT; + uReadPos = uSizeFile-uBackRead ; + + uReadSize = ((BUFREADCOMMENT+4) < (uSizeFile-uReadPos)) ? + (BUFREADCOMMENT+4) : (uLong)(uSizeFile-uReadPos); + if (ZSEEK64(*pzlib_filefunc_def,filestream,uReadPos,ZLIB_FILEFUNC_SEEK_SET)!=0) + break; + + if (ZREAD64(*pzlib_filefunc_def,filestream,buf,uReadSize)!=uReadSize) + break; + + for (i=(int)uReadSize-3; (i--)>0;) + { + // Signature "0x07064b50" Zip64 end of central directory locater + if (((*(buf+i))==0x50) && ((*(buf+i+1))==0x4b) && ((*(buf+i+2))==0x06) && ((*(buf+i+3))==0x07)) + { + uPosFound = uReadPos+(unsigned)i; + break; + } + } + + if (uPosFound!=0) + break; + } + + free(buf); + if (uPosFound == 0) + return 0; + + /* Zip64 end of central directory locator */ + if (ZSEEK64(*pzlib_filefunc_def,filestream, uPosFound,ZLIB_FILEFUNC_SEEK_SET)!=0) + return 0; + + /* the signature, already checked */ + if (zip64local_getLong(pzlib_filefunc_def,filestream,&uL)!=ZIP_OK) + return 0; + + /* number of the disk with the start of the zip64 end of central directory */ + if (zip64local_getLong(pzlib_filefunc_def,filestream,&uL)!=ZIP_OK) + return 0; + if (uL != 0) + return 0; + + /* relative offset of the zip64 end of central directory record */ + if (zip64local_getLong64(pzlib_filefunc_def,filestream,&relativeOffset)!=ZIP_OK) + return 0; + + /* total number of disks */ + if (zip64local_getLong(pzlib_filefunc_def,filestream,&uL)!=ZIP_OK) + return 0; + if (uL != 1) + return 0; + + /* Goto Zip64 end of central directory record */ + if (ZSEEK64(*pzlib_filefunc_def,filestream, relativeOffset,ZLIB_FILEFUNC_SEEK_SET)!=0) + return 0; + + /* the signature */ + if (zip64local_getLong(pzlib_filefunc_def,filestream,&uL)!=ZIP_OK) + return 0; + + if (uL != 0x06064b50) // signature of 'Zip64 end of central directory' + return 0; + + return relativeOffset; +} + +local int LoadCentralDirectoryRecord(zip64_internal* pziinit) { + int err=ZIP_OK; + ZPOS64_T byte_before_the_zipfile;/* byte before the zipfile, (>0 for sfx)*/ + + ZPOS64_T size_central_dir; /* size of the central directory */ + ZPOS64_T offset_central_dir; /* offset of start of central directory */ + ZPOS64_T central_pos; + uLong uL; + + uLong number_disk; /* number of the current disk, used for + spanning ZIP, unsupported, always 0*/ + uLong number_disk_with_CD; /* number of the disk with central dir, used + for spanning ZIP, unsupported, always 0*/ + ZPOS64_T number_entry; + ZPOS64_T number_entry_CD; /* total number of entries in + the central dir + (same than number_entry on nospan) */ + uLong VersionMadeBy; + uLong VersionNeeded; + uLong size_comment; + + int hasZIP64Record = 0; + + // check first if we find a ZIP64 record + central_pos = zip64local_SearchCentralDir64(&pziinit->z_filefunc,pziinit->filestream); + if(central_pos > 0) + { + hasZIP64Record = 1; + } + else if(central_pos == 0) + { + central_pos = zip64local_SearchCentralDir(&pziinit->z_filefunc,pziinit->filestream); + } + +/* disable to allow appending to empty ZIP archive + if (central_pos==0) + err=ZIP_ERRNO; +*/ + + if(hasZIP64Record) + { + ZPOS64_T sizeEndOfCentralDirectory; + if (ZSEEK64(pziinit->z_filefunc, pziinit->filestream, central_pos, ZLIB_FILEFUNC_SEEK_SET) != 0) + err=ZIP_ERRNO; + + /* the signature, already checked */ + if (zip64local_getLong(&pziinit->z_filefunc, pziinit->filestream,&uL)!=ZIP_OK) + err=ZIP_ERRNO; + + /* size of zip64 end of central directory record */ + if (zip64local_getLong64(&pziinit->z_filefunc, pziinit->filestream, &sizeEndOfCentralDirectory)!=ZIP_OK) + err=ZIP_ERRNO; + + /* version made by */ + if (zip64local_getShort(&pziinit->z_filefunc, pziinit->filestream, &VersionMadeBy)!=ZIP_OK) + err=ZIP_ERRNO; + + /* version needed to extract */ + if (zip64local_getShort(&pziinit->z_filefunc, pziinit->filestream, &VersionNeeded)!=ZIP_OK) + err=ZIP_ERRNO; + + /* number of this disk */ + if (zip64local_getLong(&pziinit->z_filefunc, pziinit->filestream,&number_disk)!=ZIP_OK) + err=ZIP_ERRNO; + + /* number of the disk with the start of the central directory */ + if (zip64local_getLong(&pziinit->z_filefunc, pziinit->filestream,&number_disk_with_CD)!=ZIP_OK) + err=ZIP_ERRNO; + + /* total number of entries in the central directory on this disk */ + if (zip64local_getLong64(&pziinit->z_filefunc, pziinit->filestream, &number_entry)!=ZIP_OK) + err=ZIP_ERRNO; + + /* total number of entries in the central directory */ + if (zip64local_getLong64(&pziinit->z_filefunc, pziinit->filestream,&number_entry_CD)!=ZIP_OK) + err=ZIP_ERRNO; + + if ((number_entry_CD!=number_entry) || (number_disk_with_CD!=0) || (number_disk!=0)) + err=ZIP_BADZIPFILE; + + /* size of the central directory */ + if (zip64local_getLong64(&pziinit->z_filefunc, pziinit->filestream,&size_central_dir)!=ZIP_OK) + err=ZIP_ERRNO; + + /* offset of start of central directory with respect to the + starting disk number */ + if (zip64local_getLong64(&pziinit->z_filefunc, pziinit->filestream,&offset_central_dir)!=ZIP_OK) + err=ZIP_ERRNO; + + // TODO.. + // read the comment from the standard central header. + size_comment = 0; + } + else + { + // Read End of central Directory info + if (ZSEEK64(pziinit->z_filefunc, pziinit->filestream, central_pos,ZLIB_FILEFUNC_SEEK_SET)!=0) + err=ZIP_ERRNO; + + /* the signature, already checked */ + if (zip64local_getLong(&pziinit->z_filefunc, pziinit->filestream,&uL)!=ZIP_OK) + err=ZIP_ERRNO; + + /* number of this disk */ + if (zip64local_getShort(&pziinit->z_filefunc, pziinit->filestream,&number_disk)!=ZIP_OK) + err=ZIP_ERRNO; + + /* number of the disk with the start of the central directory */ + if (zip64local_getShort(&pziinit->z_filefunc, pziinit->filestream,&number_disk_with_CD)!=ZIP_OK) + err=ZIP_ERRNO; + + /* total number of entries in the central dir on this disk */ + number_entry = 0; + if (zip64local_getShort(&pziinit->z_filefunc, pziinit->filestream, &uL)!=ZIP_OK) + err=ZIP_ERRNO; + else + number_entry = uL; + + /* total number of entries in the central dir */ + number_entry_CD = 0; + if (zip64local_getShort(&pziinit->z_filefunc, pziinit->filestream, &uL)!=ZIP_OK) + err=ZIP_ERRNO; + else + number_entry_CD = uL; + + if ((number_entry_CD!=number_entry) || (number_disk_with_CD!=0) || (number_disk!=0)) + err=ZIP_BADZIPFILE; + + /* size of the central directory */ + size_central_dir = 0; + if (zip64local_getLong(&pziinit->z_filefunc, pziinit->filestream, &uL)!=ZIP_OK) + err=ZIP_ERRNO; + else + size_central_dir = uL; + + /* offset of start of central directory with respect to the starting disk number */ + offset_central_dir = 0; + if (zip64local_getLong(&pziinit->z_filefunc, pziinit->filestream, &uL)!=ZIP_OK) + err=ZIP_ERRNO; + else + offset_central_dir = uL; + + + /* zipfile global comment length */ + if (zip64local_getShort(&pziinit->z_filefunc, pziinit->filestream, &size_comment)!=ZIP_OK) + err=ZIP_ERRNO; + } + + if ((central_posz_filefunc, pziinit->filestream); + return ZIP_ERRNO; + } + + if (size_comment>0) + { + pziinit->globalcomment = (char*)ALLOC(size_comment+1); + if (pziinit->globalcomment) + { + size_comment = ZREAD64(pziinit->z_filefunc, pziinit->filestream, pziinit->globalcomment,size_comment); + pziinit->globalcomment[size_comment]=0; + } + } + + byte_before_the_zipfile = central_pos - (offset_central_dir+size_central_dir); + pziinit->add_position_when_writing_offset = byte_before_the_zipfile; + + { + ZPOS64_T size_central_dir_to_read = size_central_dir; + size_t buf_size = SIZEDATA_INDATABLOCK; + void* buf_read = (void*)ALLOC(buf_size); + if (ZSEEK64(pziinit->z_filefunc, pziinit->filestream, offset_central_dir + byte_before_the_zipfile, ZLIB_FILEFUNC_SEEK_SET) != 0) + err=ZIP_ERRNO; + + while ((size_central_dir_to_read>0) && (err==ZIP_OK)) + { + ZPOS64_T read_this = SIZEDATA_INDATABLOCK; + if (read_this > size_central_dir_to_read) + read_this = size_central_dir_to_read; + + if (ZREAD64(pziinit->z_filefunc, pziinit->filestream,buf_read,(uLong)read_this) != read_this) + err=ZIP_ERRNO; + + if (err==ZIP_OK) + err = add_data_in_datablock(&pziinit->central_dir,buf_read, (uLong)read_this); + + size_central_dir_to_read-=read_this; + } + free(buf_read); + } + pziinit->begin_pos = byte_before_the_zipfile; + pziinit->number_entry = number_entry_CD; + + if (ZSEEK64(pziinit->z_filefunc, pziinit->filestream, offset_central_dir+byte_before_the_zipfile,ZLIB_FILEFUNC_SEEK_SET) != 0) + err=ZIP_ERRNO; + + return err; +} + + +#endif /* !NO_ADDFILEINEXISTINGZIP*/ + + +/************************************************************/ +extern zipFile ZEXPORT zipOpen3(const void *pathname, int append, zipcharpc* globalcomment, zlib_filefunc64_32_def* pzlib_filefunc64_32_def) { + zip64_internal ziinit; + zip64_internal* zi; + int err=ZIP_OK; + + ziinit.z_filefunc.zseek32_file = NULL; + ziinit.z_filefunc.ztell32_file = NULL; + if (pzlib_filefunc64_32_def==NULL) + fill_fopen64_filefunc(&ziinit.z_filefunc.zfile_func64); + else + ziinit.z_filefunc = *pzlib_filefunc64_32_def; + + ziinit.filestream = ZOPEN64(ziinit.z_filefunc, + pathname, + (append == APPEND_STATUS_CREATE) ? + (ZLIB_FILEFUNC_MODE_READ | ZLIB_FILEFUNC_MODE_WRITE | ZLIB_FILEFUNC_MODE_CREATE) : + (ZLIB_FILEFUNC_MODE_READ | ZLIB_FILEFUNC_MODE_WRITE | ZLIB_FILEFUNC_MODE_EXISTING)); + + if (ziinit.filestream == NULL) + return NULL; + + if (append == APPEND_STATUS_CREATEAFTER) + ZSEEK64(ziinit.z_filefunc,ziinit.filestream,0,SEEK_END); + + ziinit.begin_pos = ZTELL64(ziinit.z_filefunc,ziinit.filestream); + ziinit.in_opened_file_inzip = 0; + ziinit.ci.stream_initialised = 0; + ziinit.number_entry = 0; + ziinit.add_position_when_writing_offset = 0; + init_linkedlist(&(ziinit.central_dir)); + + + + zi = (zip64_internal*)ALLOC(sizeof(zip64_internal)); + if (zi==NULL) + { + ZCLOSE64(ziinit.z_filefunc,ziinit.filestream); + return NULL; + } + + /* now we add file in a zipfile */ +# ifndef NO_ADDFILEINEXISTINGZIP + ziinit.globalcomment = NULL; + if (append == APPEND_STATUS_ADDINZIP) + { + // Read and Cache Central Directory Records + err = LoadCentralDirectoryRecord(&ziinit); + } + + if (globalcomment) + { + *globalcomment = ziinit.globalcomment; + } +# endif /* !NO_ADDFILEINEXISTINGZIP*/ + + if (err != ZIP_OK) + { +# ifndef NO_ADDFILEINEXISTINGZIP + free(ziinit.globalcomment); +# endif /* !NO_ADDFILEINEXISTINGZIP*/ + free(zi); + return NULL; + } + else + { + *zi = ziinit; + return (zipFile)zi; + } +} + +extern zipFile ZEXPORT zipOpen2(const char *pathname, int append, zipcharpc* globalcomment, zlib_filefunc_def* pzlib_filefunc32_def) { + if (pzlib_filefunc32_def != NULL) + { + zlib_filefunc64_32_def zlib_filefunc64_32_def_fill; + fill_zlib_filefunc64_32_def_from_filefunc32(&zlib_filefunc64_32_def_fill,pzlib_filefunc32_def); + return zipOpen3(pathname, append, globalcomment, &zlib_filefunc64_32_def_fill); + } + else + return zipOpen3(pathname, append, globalcomment, NULL); +} + +extern zipFile ZEXPORT zipOpen2_64(const void *pathname, int append, zipcharpc* globalcomment, zlib_filefunc64_def* pzlib_filefunc_def) { + if (pzlib_filefunc_def != NULL) + { + zlib_filefunc64_32_def zlib_filefunc64_32_def_fill; + zlib_filefunc64_32_def_fill.zfile_func64 = *pzlib_filefunc_def; + zlib_filefunc64_32_def_fill.ztell32_file = NULL; + zlib_filefunc64_32_def_fill.zseek32_file = NULL; + return zipOpen3(pathname, append, globalcomment, &zlib_filefunc64_32_def_fill); + } + else + return zipOpen3(pathname, append, globalcomment, NULL); +} + + + +extern zipFile ZEXPORT zipOpen(const char* pathname, int append) { + return zipOpen3((const void*)pathname,append,NULL,NULL); +} + +extern zipFile ZEXPORT zipOpen64(const void* pathname, int append) { + return zipOpen3(pathname,append,NULL,NULL); +} + +local int Write_LocalFileHeader(zip64_internal* zi, const char* filename, uInt size_extrafield_local, const void* extrafield_local) { + /* write the local header */ + int err; + uInt size_filename = (uInt)strlen(filename); + uInt size_extrafield = size_extrafield_local; + + err = zip64local_putValue(&zi->z_filefunc,zi->filestream,(uLong)LOCALHEADERMAGIC, 4); + + if (err==ZIP_OK) + { + if(zi->ci.zip64) + err = zip64local_putValue(&zi->z_filefunc,zi->filestream,(uLong)45,2);/* version needed to extract */ + else + err = zip64local_putValue(&zi->z_filefunc,zi->filestream,(uLong)20,2);/* version needed to extract */ + } + + if (err==ZIP_OK) + err = zip64local_putValue(&zi->z_filefunc,zi->filestream,(uLong)zi->ci.flag,2); + + if (err==ZIP_OK) + err = zip64local_putValue(&zi->z_filefunc,zi->filestream,(uLong)zi->ci.method,2); + + if (err==ZIP_OK) + err = zip64local_putValue(&zi->z_filefunc,zi->filestream,(uLong)zi->ci.dosDate,4); + + // CRC / Compressed size / Uncompressed size will be filled in later and rewritten later + if (err==ZIP_OK) + err = zip64local_putValue(&zi->z_filefunc,zi->filestream,(uLong)0,4); /* crc 32, unknown */ + if (err==ZIP_OK) + { + if(zi->ci.zip64) + err = zip64local_putValue(&zi->z_filefunc,zi->filestream,(uLong)0xFFFFFFFF,4); /* compressed size, unknown */ + else + err = zip64local_putValue(&zi->z_filefunc,zi->filestream,(uLong)0,4); /* compressed size, unknown */ + } + if (err==ZIP_OK) + { + if(zi->ci.zip64) + err = zip64local_putValue(&zi->z_filefunc,zi->filestream,(uLong)0xFFFFFFFF,4); /* uncompressed size, unknown */ + else + err = zip64local_putValue(&zi->z_filefunc,zi->filestream,(uLong)0,4); /* uncompressed size, unknown */ + } + + if (err==ZIP_OK) + err = zip64local_putValue(&zi->z_filefunc,zi->filestream,(uLong)size_filename,2); + + if(zi->ci.zip64) + { + size_extrafield += 20; + } + + if (err==ZIP_OK) + err = zip64local_putValue(&zi->z_filefunc,zi->filestream,(uLong)size_extrafield,2); + + if ((err==ZIP_OK) && (size_filename > 0)) + { + if (ZWRITE64(zi->z_filefunc,zi->filestream,filename,size_filename)!=size_filename) + err = ZIP_ERRNO; + } + + if ((err==ZIP_OK) && (size_extrafield_local > 0)) + { + if (ZWRITE64(zi->z_filefunc, zi->filestream, extrafield_local, size_extrafield_local) != size_extrafield_local) + err = ZIP_ERRNO; + } + + + if ((err==ZIP_OK) && (zi->ci.zip64)) + { + // write the Zip64 extended info + short HeaderID = 1; + short DataSize = 16; + ZPOS64_T CompressedSize = 0; + ZPOS64_T UncompressedSize = 0; + + // Remember position of Zip64 extended info for the local file header. (needed when we update size after done with file) + zi->ci.pos_zip64extrainfo = ZTELL64(zi->z_filefunc,zi->filestream); + + err = zip64local_putValue(&zi->z_filefunc, zi->filestream, (ZPOS64_T)HeaderID,2); + err = zip64local_putValue(&zi->z_filefunc, zi->filestream, (ZPOS64_T)DataSize,2); + + err = zip64local_putValue(&zi->z_filefunc, zi->filestream, (ZPOS64_T)UncompressedSize,8); + err = zip64local_putValue(&zi->z_filefunc, zi->filestream, (ZPOS64_T)CompressedSize,8); + } + + return err; +} + +/* + NOTE. + When writing RAW the ZIP64 extended information in extrafield_local and extrafield_global needs to be stripped + before calling this function it can be done with zipRemoveExtraInfoBlock + + It is not done here because then we need to realloc a new buffer since parameters are 'const' and I want to minimize + unnecessary allocations. + */ +extern int ZEXPORT zipOpenNewFileInZip4_64(zipFile file, const char* filename, const zip_fileinfo* zipfi, + const void* extrafield_local, uInt size_extrafield_local, + const void* extrafield_global, uInt size_extrafield_global, + const char* comment, int method, int level, int raw, + int windowBits,int memLevel, int strategy, + const char* password, uLong crcForCrypting, + uLong versionMadeBy, uLong flagBase, int zip64) { + zip64_internal* zi; + uInt size_filename; + uInt size_comment; + uInt i; + int err = ZIP_OK; + +# ifdef NOCRYPT + (crcForCrypting); + if (password != NULL) + return ZIP_PARAMERROR; +# endif + + if (file == NULL) + return ZIP_PARAMERROR; + +#ifdef HAVE_BZIP2 + if ((method!=0) && (method!=Z_DEFLATED) && (method!=Z_BZIP2ED)) + return ZIP_PARAMERROR; +#else + if ((method!=0) && (method!=Z_DEFLATED)) + return ZIP_PARAMERROR; +#endif + + // The filename and comment length must fit in 16 bits. + if ((filename!=NULL) && (strlen(filename)>0xffff)) + return ZIP_PARAMERROR; + if ((comment!=NULL) && (strlen(comment)>0xffff)) + return ZIP_PARAMERROR; + // The extra field length must fit in 16 bits. If the member also requires + // a Zip64 extra block, that will also need to fit within that 16-bit + // length, but that will be checked for later. + if ((size_extrafield_local>0xffff) || (size_extrafield_global>0xffff)) + return ZIP_PARAMERROR; + + zi = (zip64_internal*)file; + + if (zi->in_opened_file_inzip == 1) + { + err = zipCloseFileInZip (file); + if (err != ZIP_OK) + return err; + } + + if (filename==NULL) + filename="-"; + + if (comment==NULL) + size_comment = 0; + else + size_comment = (uInt)strlen(comment); + + size_filename = (uInt)strlen(filename); + + if (zipfi == NULL) + zi->ci.dosDate = 0; + else + { + if (zipfi->dosDate != 0) + zi->ci.dosDate = zipfi->dosDate; + else + zi->ci.dosDate = zip64local_TmzDateToDosDate(&zipfi->tmz_date); + } + + zi->ci.flag = flagBase; + if ((level==8) || (level==9)) + zi->ci.flag |= 2; + if (level==2) + zi->ci.flag |= 4; + if (level==1) + zi->ci.flag |= 6; + if (password != NULL) + zi->ci.flag |= 1; + + zi->ci.crc32 = 0; + zi->ci.method = method; + zi->ci.encrypt = 0; + zi->ci.stream_initialised = 0; + zi->ci.pos_in_buffered_data = 0; + zi->ci.raw = raw; + zi->ci.pos_local_header = ZTELL64(zi->z_filefunc,zi->filestream); + + zi->ci.size_centralheader = SIZECENTRALHEADER + size_filename + size_extrafield_global + size_comment; + zi->ci.size_centralExtraFree = 32; // Extra space we have reserved in case we need to add ZIP64 extra info data + + zi->ci.central_header = (char*)ALLOC((uInt)zi->ci.size_centralheader + zi->ci.size_centralExtraFree); + + zi->ci.size_centralExtra = size_extrafield_global; + zip64local_putValue_inmemory(zi->ci.central_header,(uLong)CENTRALHEADERMAGIC,4); + /* version info */ + zip64local_putValue_inmemory(zi->ci.central_header+4,(uLong)versionMadeBy,2); + zip64local_putValue_inmemory(zi->ci.central_header+6,(uLong)20,2); + zip64local_putValue_inmemory(zi->ci.central_header+8,(uLong)zi->ci.flag,2); + zip64local_putValue_inmemory(zi->ci.central_header+10,(uLong)zi->ci.method,2); + zip64local_putValue_inmemory(zi->ci.central_header+12,(uLong)zi->ci.dosDate,4); + zip64local_putValue_inmemory(zi->ci.central_header+16,(uLong)0,4); /*crc*/ + zip64local_putValue_inmemory(zi->ci.central_header+20,(uLong)0,4); /*compr size*/ + zip64local_putValue_inmemory(zi->ci.central_header+24,(uLong)0,4); /*uncompr size*/ + zip64local_putValue_inmemory(zi->ci.central_header+28,(uLong)size_filename,2); + zip64local_putValue_inmemory(zi->ci.central_header+30,(uLong)size_extrafield_global,2); + zip64local_putValue_inmemory(zi->ci.central_header+32,(uLong)size_comment,2); + zip64local_putValue_inmemory(zi->ci.central_header+34,(uLong)0,2); /*disk nm start*/ + + if (zipfi==NULL) + zip64local_putValue_inmemory(zi->ci.central_header+36,(uLong)0,2); + else + zip64local_putValue_inmemory(zi->ci.central_header+36,(uLong)zipfi->internal_fa,2); + + if (zipfi==NULL) + zip64local_putValue_inmemory(zi->ci.central_header+38,(uLong)0,4); + else + zip64local_putValue_inmemory(zi->ci.central_header+38,(uLong)zipfi->external_fa,4); + + if(zi->ci.pos_local_header >= 0xffffffff) + zip64local_putValue_inmemory(zi->ci.central_header+42,(uLong)0xffffffff,4); + else + zip64local_putValue_inmemory(zi->ci.central_header+42,(uLong)zi->ci.pos_local_header - zi->add_position_when_writing_offset,4); + + for (i=0;ici.central_header+SIZECENTRALHEADER+i) = *(filename+i); + + for (i=0;ici.central_header+SIZECENTRALHEADER+size_filename+i) = + *(((const char*)extrafield_global)+i); + + for (i=0;ici.central_header+SIZECENTRALHEADER+size_filename+ + size_extrafield_global+i) = *(comment+i); + if (zi->ci.central_header == NULL) + return ZIP_INTERNALERROR; + + zi->ci.zip64 = zip64; + zi->ci.totalCompressedData = 0; + zi->ci.totalUncompressedData = 0; + zi->ci.pos_zip64extrainfo = 0; + + err = Write_LocalFileHeader(zi, filename, size_extrafield_local, extrafield_local); + +#ifdef HAVE_BZIP2 + zi->ci.bstream.avail_in = (uInt)0; + zi->ci.bstream.avail_out = (uInt)Z_BUFSIZE; + zi->ci.bstream.next_out = (char*)zi->ci.buffered_data; + zi->ci.bstream.total_in_hi32 = 0; + zi->ci.bstream.total_in_lo32 = 0; + zi->ci.bstream.total_out_hi32 = 0; + zi->ci.bstream.total_out_lo32 = 0; +#endif + + zi->ci.stream.avail_in = (uInt)0; + zi->ci.stream.avail_out = (uInt)Z_BUFSIZE; + zi->ci.stream.next_out = zi->ci.buffered_data; + zi->ci.stream.total_in = 0; + zi->ci.stream.total_out = 0; + zi->ci.stream.data_type = Z_BINARY; + +#ifdef HAVE_BZIP2 + if ((err==ZIP_OK) && (zi->ci.method == Z_DEFLATED || zi->ci.method == Z_BZIP2ED) && (!zi->ci.raw)) +#else + if ((err==ZIP_OK) && (zi->ci.method == Z_DEFLATED) && (!zi->ci.raw)) +#endif + { + if(zi->ci.method == Z_DEFLATED) + { + zi->ci.stream.zalloc = (alloc_func)0; + zi->ci.stream.zfree = (free_func)0; + zi->ci.stream.opaque = (voidpf)0; + + if (windowBits>0) + windowBits = -windowBits; + + err = deflateInit2(&zi->ci.stream, level, Z_DEFLATED, windowBits, memLevel, strategy); + + if (err==Z_OK) + zi->ci.stream_initialised = Z_DEFLATED; + } + else if(zi->ci.method == Z_BZIP2ED) + { +#ifdef HAVE_BZIP2 + // Init BZip stuff here + zi->ci.bstream.bzalloc = 0; + zi->ci.bstream.bzfree = 0; + zi->ci.bstream.opaque = (voidpf)0; + + err = BZ2_bzCompressInit(&zi->ci.bstream, level, 0,35); + if(err == BZ_OK) + zi->ci.stream_initialised = Z_BZIP2ED; +#endif + } + + } + +# ifndef NOCRYPT + zi->ci.crypt_header_size = 0; + if ((err==Z_OK) && (password != NULL)) + { + unsigned char bufHead[RAND_HEAD_LEN]; + unsigned int sizeHead; + zi->ci.encrypt = 1; + zi->ci.pcrc_32_tab = get_crc_table(); + /*init_keys(password,zi->ci.keys,zi->ci.pcrc_32_tab);*/ + + sizeHead=crypthead(password,bufHead,RAND_HEAD_LEN,zi->ci.keys,zi->ci.pcrc_32_tab,crcForCrypting); + zi->ci.crypt_header_size = sizeHead; + + if (ZWRITE64(zi->z_filefunc,zi->filestream,bufHead,sizeHead) != sizeHead) + err = ZIP_ERRNO; + } +# endif + + if (err==Z_OK) + zi->in_opened_file_inzip = 1; + return err; +} + +extern int ZEXPORT zipOpenNewFileInZip4(zipFile file, const char* filename, const zip_fileinfo* zipfi, + const void* extrafield_local, uInt size_extrafield_local, + const void* extrafield_global, uInt size_extrafield_global, + const char* comment, int method, int level, int raw, + int windowBits,int memLevel, int strategy, + const char* password, uLong crcForCrypting, + uLong versionMadeBy, uLong flagBase) { + return zipOpenNewFileInZip4_64(file, filename, zipfi, + extrafield_local, size_extrafield_local, + extrafield_global, size_extrafield_global, + comment, method, level, raw, + windowBits, memLevel, strategy, + password, crcForCrypting, versionMadeBy, flagBase, 0); +} + +extern int ZEXPORT zipOpenNewFileInZip3(zipFile file, const char* filename, const zip_fileinfo* zipfi, + const void* extrafield_local, uInt size_extrafield_local, + const void* extrafield_global, uInt size_extrafield_global, + const char* comment, int method, int level, int raw, + int windowBits,int memLevel, int strategy, + const char* password, uLong crcForCrypting) { + return zipOpenNewFileInZip4_64(file, filename, zipfi, + extrafield_local, size_extrafield_local, + extrafield_global, size_extrafield_global, + comment, method, level, raw, + windowBits, memLevel, strategy, + password, crcForCrypting, VERSIONMADEBY, 0, 0); +} + +extern int ZEXPORT zipOpenNewFileInZip3_64(zipFile file, const char* filename, const zip_fileinfo* zipfi, + const void* extrafield_local, uInt size_extrafield_local, + const void* extrafield_global, uInt size_extrafield_global, + const char* comment, int method, int level, int raw, + int windowBits,int memLevel, int strategy, + const char* password, uLong crcForCrypting, int zip64) { + return zipOpenNewFileInZip4_64(file, filename, zipfi, + extrafield_local, size_extrafield_local, + extrafield_global, size_extrafield_global, + comment, method, level, raw, + windowBits, memLevel, strategy, + password, crcForCrypting, VERSIONMADEBY, 0, zip64); +} + +extern int ZEXPORT zipOpenNewFileInZip2(zipFile file, const char* filename, const zip_fileinfo* zipfi, + const void* extrafield_local, uInt size_extrafield_local, + const void* extrafield_global, uInt size_extrafield_global, + const char* comment, int method, int level, int raw) { + return zipOpenNewFileInZip4_64(file, filename, zipfi, + extrafield_local, size_extrafield_local, + extrafield_global, size_extrafield_global, + comment, method, level, raw, + -MAX_WBITS, DEF_MEM_LEVEL, Z_DEFAULT_STRATEGY, + NULL, 0, VERSIONMADEBY, 0, 0); +} + +extern int ZEXPORT zipOpenNewFileInZip2_64(zipFile file, const char* filename, const zip_fileinfo* zipfi, + const void* extrafield_local, uInt size_extrafield_local, + const void* extrafield_global, uInt size_extrafield_global, + const char* comment, int method, int level, int raw, int zip64) { + return zipOpenNewFileInZip4_64(file, filename, zipfi, + extrafield_local, size_extrafield_local, + extrafield_global, size_extrafield_global, + comment, method, level, raw, + -MAX_WBITS, DEF_MEM_LEVEL, Z_DEFAULT_STRATEGY, + NULL, 0, VERSIONMADEBY, 0, zip64); +} + +extern int ZEXPORT zipOpenNewFileInZip64(zipFile file, const char* filename, const zip_fileinfo* zipfi, + const void* extrafield_local, uInt size_extrafield_local, + const void*extrafield_global, uInt size_extrafield_global, + const char* comment, int method, int level, int zip64) { + return zipOpenNewFileInZip4_64(file, filename, zipfi, + extrafield_local, size_extrafield_local, + extrafield_global, size_extrafield_global, + comment, method, level, 0, + -MAX_WBITS, DEF_MEM_LEVEL, Z_DEFAULT_STRATEGY, + NULL, 0, VERSIONMADEBY, 0, zip64); +} + +extern int ZEXPORT zipOpenNewFileInZip(zipFile file, const char* filename, const zip_fileinfo* zipfi, + const void* extrafield_local, uInt size_extrafield_local, + const void*extrafield_global, uInt size_extrafield_global, + const char* comment, int method, int level) { + return zipOpenNewFileInZip4_64(file, filename, zipfi, + extrafield_local, size_extrafield_local, + extrafield_global, size_extrafield_global, + comment, method, level, 0, + -MAX_WBITS, DEF_MEM_LEVEL, Z_DEFAULT_STRATEGY, + NULL, 0, VERSIONMADEBY, 0, 0); +} + +local int zip64FlushWriteBuffer(zip64_internal* zi) { + int err=ZIP_OK; + + if (zi->ci.encrypt != 0) + { +#ifndef NOCRYPT + uInt i; + int t; + for (i=0;ici.pos_in_buffered_data;i++) + zi->ci.buffered_data[i] = zencode(zi->ci.keys, zi->ci.pcrc_32_tab, zi->ci.buffered_data[i],t); +#endif + } + + if (ZWRITE64(zi->z_filefunc,zi->filestream,zi->ci.buffered_data,zi->ci.pos_in_buffered_data) != zi->ci.pos_in_buffered_data) + err = ZIP_ERRNO; + + zi->ci.totalCompressedData += zi->ci.pos_in_buffered_data; + +#ifdef HAVE_BZIP2 + if(zi->ci.method == Z_BZIP2ED) + { + zi->ci.totalUncompressedData += zi->ci.bstream.total_in_lo32; + zi->ci.bstream.total_in_lo32 = 0; + zi->ci.bstream.total_in_hi32 = 0; + } + else +#endif + { + zi->ci.totalUncompressedData += zi->ci.stream.total_in; + zi->ci.stream.total_in = 0; + } + + + zi->ci.pos_in_buffered_data = 0; + + return err; +} + +extern int ZEXPORT zipWriteInFileInZip(zipFile file, const void* buf, unsigned int len) { + zip64_internal* zi; + int err=ZIP_OK; + + if (file == NULL) + return ZIP_PARAMERROR; + zi = (zip64_internal*)file; + + if (zi->in_opened_file_inzip == 0) + return ZIP_PARAMERROR; + + zi->ci.crc32 = crc32(zi->ci.crc32,buf,(uInt)len); + +#ifdef HAVE_BZIP2 + if(zi->ci.method == Z_BZIP2ED && (!zi->ci.raw)) + { + zi->ci.bstream.next_in = (void*)buf; + zi->ci.bstream.avail_in = len; + err = BZ_RUN_OK; + + while ((err==BZ_RUN_OK) && (zi->ci.bstream.avail_in>0)) + { + if (zi->ci.bstream.avail_out == 0) + { + if (zip64FlushWriteBuffer(zi) == ZIP_ERRNO) + err = ZIP_ERRNO; + zi->ci.bstream.avail_out = (uInt)Z_BUFSIZE; + zi->ci.bstream.next_out = (char*)zi->ci.buffered_data; + } + + + if(err != BZ_RUN_OK) + break; + + if ((zi->ci.method == Z_BZIP2ED) && (!zi->ci.raw)) + { + uLong uTotalOutBefore_lo = zi->ci.bstream.total_out_lo32; +// uLong uTotalOutBefore_hi = zi->ci.bstream.total_out_hi32; + err=BZ2_bzCompress(&zi->ci.bstream, BZ_RUN); + + zi->ci.pos_in_buffered_data += (uInt)(zi->ci.bstream.total_out_lo32 - uTotalOutBefore_lo) ; + } + } + + if(err == BZ_RUN_OK) + err = ZIP_OK; + } + else +#endif + { + zi->ci.stream.next_in = (Bytef*)(uintptr_t)buf; + zi->ci.stream.avail_in = len; + + while ((err==ZIP_OK) && (zi->ci.stream.avail_in>0)) + { + if (zi->ci.stream.avail_out == 0) + { + if (zip64FlushWriteBuffer(zi) == ZIP_ERRNO) + err = ZIP_ERRNO; + zi->ci.stream.avail_out = (uInt)Z_BUFSIZE; + zi->ci.stream.next_out = zi->ci.buffered_data; + } + + + if(err != ZIP_OK) + break; + + if ((zi->ci.method == Z_DEFLATED) && (!zi->ci.raw)) + { + uLong uTotalOutBefore = zi->ci.stream.total_out; + err=deflate(&zi->ci.stream, Z_NO_FLUSH); + + zi->ci.pos_in_buffered_data += (uInt)(zi->ci.stream.total_out - uTotalOutBefore) ; + } + else + { + uInt copy_this,i; + if (zi->ci.stream.avail_in < zi->ci.stream.avail_out) + copy_this = zi->ci.stream.avail_in; + else + copy_this = zi->ci.stream.avail_out; + + for (i = 0; i < copy_this; i++) + *(((char*)zi->ci.stream.next_out)+i) = + *(((const char*)zi->ci.stream.next_in)+i); + { + zi->ci.stream.avail_in -= copy_this; + zi->ci.stream.avail_out-= copy_this; + zi->ci.stream.next_in+= copy_this; + zi->ci.stream.next_out+= copy_this; + zi->ci.stream.total_in+= copy_this; + zi->ci.stream.total_out+= copy_this; + zi->ci.pos_in_buffered_data += copy_this; + } + } + }// while(...) + } + + return err; +} + +extern int ZEXPORT zipCloseFileInZipRaw(zipFile file, uLong uncompressed_size, uLong crc32) { + return zipCloseFileInZipRaw64 (file, uncompressed_size, crc32); +} + +extern int ZEXPORT zipCloseFileInZipRaw64(zipFile file, ZPOS64_T uncompressed_size, uLong crc32) { + zip64_internal* zi; + ZPOS64_T compressed_size; + uLong invalidValue = 0xffffffff; + unsigned datasize = 0; + int err=ZIP_OK; + + if (file == NULL) + return ZIP_PARAMERROR; + zi = (zip64_internal*)file; + + if (zi->in_opened_file_inzip == 0) + return ZIP_PARAMERROR; + zi->ci.stream.avail_in = 0; + + if ((zi->ci.method == Z_DEFLATED) && (!zi->ci.raw)) + { + while (err==ZIP_OK) + { + uLong uTotalOutBefore; + if (zi->ci.stream.avail_out == 0) + { + if (zip64FlushWriteBuffer(zi) == ZIP_ERRNO) + err = ZIP_ERRNO; + zi->ci.stream.avail_out = (uInt)Z_BUFSIZE; + zi->ci.stream.next_out = zi->ci.buffered_data; + } + uTotalOutBefore = zi->ci.stream.total_out; + err=deflate(&zi->ci.stream, Z_FINISH); + zi->ci.pos_in_buffered_data += (uInt)(zi->ci.stream.total_out - uTotalOutBefore) ; + } + } + else if ((zi->ci.method == Z_BZIP2ED) && (!zi->ci.raw)) + { +#ifdef HAVE_BZIP2 + err = BZ_FINISH_OK; + while (err==BZ_FINISH_OK) + { + uLong uTotalOutBefore; + if (zi->ci.bstream.avail_out == 0) + { + if (zip64FlushWriteBuffer(zi) == ZIP_ERRNO) + err = ZIP_ERRNO; + zi->ci.bstream.avail_out = (uInt)Z_BUFSIZE; + zi->ci.bstream.next_out = (char*)zi->ci.buffered_data; + } + uTotalOutBefore = zi->ci.bstream.total_out_lo32; + err=BZ2_bzCompress(&zi->ci.bstream, BZ_FINISH); + if(err == BZ_STREAM_END) + err = Z_STREAM_END; + + zi->ci.pos_in_buffered_data += (uInt)(zi->ci.bstream.total_out_lo32 - uTotalOutBefore); + } + + if(err == BZ_FINISH_OK) + err = ZIP_OK; +#endif + } + + if (err==Z_STREAM_END) + err=ZIP_OK; /* this is normal */ + + if ((zi->ci.pos_in_buffered_data>0) && (err==ZIP_OK)) + { + if (zip64FlushWriteBuffer(zi)==ZIP_ERRNO) + err = ZIP_ERRNO; + } + + if ((zi->ci.method == Z_DEFLATED) && (!zi->ci.raw)) + { + int tmp_err = deflateEnd(&zi->ci.stream); + if (err == ZIP_OK) + err = tmp_err; + zi->ci.stream_initialised = 0; + } +#ifdef HAVE_BZIP2 + else if((zi->ci.method == Z_BZIP2ED) && (!zi->ci.raw)) + { + int tmperr = BZ2_bzCompressEnd(&zi->ci.bstream); + if (err==ZIP_OK) + err = tmperr; + zi->ci.stream_initialised = 0; + } +#endif + + if (!zi->ci.raw) + { + crc32 = (uLong)zi->ci.crc32; + uncompressed_size = zi->ci.totalUncompressedData; + } + compressed_size = zi->ci.totalCompressedData; + +# ifndef NOCRYPT + compressed_size += zi->ci.crypt_header_size; +# endif + + // update Current Item crc and sizes, + if(compressed_size >= 0xffffffff || uncompressed_size >= 0xffffffff || zi->ci.pos_local_header >= 0xffffffff) + { + /*version Made by*/ + zip64local_putValue_inmemory(zi->ci.central_header+4,(uLong)45,2); + /*version needed*/ + zip64local_putValue_inmemory(zi->ci.central_header+6,(uLong)45,2); + + } + + zip64local_putValue_inmemory(zi->ci.central_header+16,crc32,4); /*crc*/ + + + if(compressed_size >= 0xffffffff) + zip64local_putValue_inmemory(zi->ci.central_header+20, invalidValue,4); /*compr size*/ + else + zip64local_putValue_inmemory(zi->ci.central_header+20, compressed_size,4); /*compr size*/ + + /// set internal file attributes field + if (zi->ci.stream.data_type == Z_ASCII) + zip64local_putValue_inmemory(zi->ci.central_header+36,(uLong)Z_ASCII,2); + + if(uncompressed_size >= 0xffffffff) + zip64local_putValue_inmemory(zi->ci.central_header+24, invalidValue,4); /*uncompr size*/ + else + zip64local_putValue_inmemory(zi->ci.central_header+24, uncompressed_size,4); /*uncompr size*/ + + // Add ZIP64 extra info field for uncompressed size + if(uncompressed_size >= 0xffffffff) + datasize += 8; + + // Add ZIP64 extra info field for compressed size + if(compressed_size >= 0xffffffff) + datasize += 8; + + // Add ZIP64 extra info field for relative offset to local file header of current file + if(zi->ci.pos_local_header >= 0xffffffff) + datasize += 8; + + if(datasize > 0) + { + char* p = NULL; + + if((uLong)(datasize + 4) > zi->ci.size_centralExtraFree) + { + // we cannot write more data to the buffer that we have room for. + return ZIP_BADZIPFILE; + } + + p = zi->ci.central_header + zi->ci.size_centralheader; + + // Add Extra Information Header for 'ZIP64 information' + zip64local_putValue_inmemory(p, 0x0001, 2); // HeaderID + p += 2; + zip64local_putValue_inmemory(p, datasize, 2); // DataSize + p += 2; + + if(uncompressed_size >= 0xffffffff) + { + zip64local_putValue_inmemory(p, uncompressed_size, 8); + p += 8; + } + + if(compressed_size >= 0xffffffff) + { + zip64local_putValue_inmemory(p, compressed_size, 8); + p += 8; + } + + if(zi->ci.pos_local_header >= 0xffffffff) + { + zip64local_putValue_inmemory(p, zi->ci.pos_local_header, 8); + p += 8; + } + + // Update how much extra free space we got in the memory buffer + // and increase the centralheader size so the new ZIP64 fields are included + // ( 4 below is the size of HeaderID and DataSize field ) + zi->ci.size_centralExtraFree -= datasize + 4; + zi->ci.size_centralheader += datasize + 4; + + // Update the extra info size field + zi->ci.size_centralExtra += datasize + 4; + zip64local_putValue_inmemory(zi->ci.central_header+30,(uLong)zi->ci.size_centralExtra,2); + } + + if (err==ZIP_OK) + err = add_data_in_datablock(&zi->central_dir, zi->ci.central_header, (uLong)zi->ci.size_centralheader); + + free(zi->ci.central_header); + + if (err==ZIP_OK) + { + // Update the LocalFileHeader with the new values. + + ZPOS64_T cur_pos_inzip = ZTELL64(zi->z_filefunc,zi->filestream); + + if (ZSEEK64(zi->z_filefunc,zi->filestream, zi->ci.pos_local_header + 14,ZLIB_FILEFUNC_SEEK_SET)!=0) + err = ZIP_ERRNO; + + if (err==ZIP_OK) + err = zip64local_putValue(&zi->z_filefunc,zi->filestream,crc32,4); /* crc 32, unknown */ + + if(uncompressed_size >= 0xffffffff || compressed_size >= 0xffffffff ) + { + if(zi->ci.pos_zip64extrainfo > 0) + { + // Update the size in the ZIP64 extended field. + if (ZSEEK64(zi->z_filefunc,zi->filestream, zi->ci.pos_zip64extrainfo + 4,ZLIB_FILEFUNC_SEEK_SET)!=0) + err = ZIP_ERRNO; + + if (err==ZIP_OK) /* compressed size, unknown */ + err = zip64local_putValue(&zi->z_filefunc, zi->filestream, uncompressed_size, 8); + + if (err==ZIP_OK) /* uncompressed size, unknown */ + err = zip64local_putValue(&zi->z_filefunc, zi->filestream, compressed_size, 8); + } + else + err = ZIP_BADZIPFILE; // Caller passed zip64 = 0, so no room for zip64 info -> fatal + } + else + { + if (err==ZIP_OK) /* compressed size, unknown */ + err = zip64local_putValue(&zi->z_filefunc,zi->filestream,compressed_size,4); + + if (err==ZIP_OK) /* uncompressed size, unknown */ + err = zip64local_putValue(&zi->z_filefunc,zi->filestream,uncompressed_size,4); + } + + if (ZSEEK64(zi->z_filefunc,zi->filestream, cur_pos_inzip,ZLIB_FILEFUNC_SEEK_SET)!=0) + err = ZIP_ERRNO; + } + + zi->number_entry ++; + zi->in_opened_file_inzip = 0; + + return err; +} + +extern int ZEXPORT zipCloseFileInZip(zipFile file) { + return zipCloseFileInZipRaw (file,0,0); +} + +local int Write_Zip64EndOfCentralDirectoryLocator(zip64_internal* zi, ZPOS64_T zip64eocd_pos_inzip) { + int err = ZIP_OK; + ZPOS64_T pos = zip64eocd_pos_inzip - zi->add_position_when_writing_offset; + + err = zip64local_putValue(&zi->z_filefunc,zi->filestream,(uLong)ZIP64ENDLOCHEADERMAGIC,4); + + /*num disks*/ + if (err==ZIP_OK) /* number of the disk with the start of the central directory */ + err = zip64local_putValue(&zi->z_filefunc,zi->filestream,(uLong)0,4); + + /*relative offset*/ + if (err==ZIP_OK) /* Relative offset to the Zip64EndOfCentralDirectory */ + err = zip64local_putValue(&zi->z_filefunc,zi->filestream, pos,8); + + /*total disks*/ /* Do not support spawning of disk so always say 1 here*/ + if (err==ZIP_OK) /* number of the disk with the start of the central directory */ + err = zip64local_putValue(&zi->z_filefunc,zi->filestream,(uLong)1,4); + + return err; +} + +local int Write_Zip64EndOfCentralDirectoryRecord(zip64_internal* zi, uLong size_centraldir, ZPOS64_T centraldir_pos_inzip) { + int err = ZIP_OK; + + uLong Zip64DataSize = 44; + + err = zip64local_putValue(&zi->z_filefunc,zi->filestream,(uLong)ZIP64ENDHEADERMAGIC,4); + + if (err==ZIP_OK) /* size of this 'zip64 end of central directory' */ + err = zip64local_putValue(&zi->z_filefunc,zi->filestream,(ZPOS64_T)Zip64DataSize,8); // why ZPOS64_T of this ? + + if (err==ZIP_OK) /* version made by */ + err = zip64local_putValue(&zi->z_filefunc,zi->filestream,(uLong)45,2); + + if (err==ZIP_OK) /* version needed */ + err = zip64local_putValue(&zi->z_filefunc,zi->filestream,(uLong)45,2); + + if (err==ZIP_OK) /* number of this disk */ + err = zip64local_putValue(&zi->z_filefunc,zi->filestream,(uLong)0,4); + + if (err==ZIP_OK) /* number of the disk with the start of the central directory */ + err = zip64local_putValue(&zi->z_filefunc,zi->filestream,(uLong)0,4); + + if (err==ZIP_OK) /* total number of entries in the central dir on this disk */ + err = zip64local_putValue(&zi->z_filefunc, zi->filestream, zi->number_entry, 8); + + if (err==ZIP_OK) /* total number of entries in the central dir */ + err = zip64local_putValue(&zi->z_filefunc, zi->filestream, zi->number_entry, 8); + + if (err==ZIP_OK) /* size of the central directory */ + err = zip64local_putValue(&zi->z_filefunc,zi->filestream,(ZPOS64_T)size_centraldir,8); + + if (err==ZIP_OK) /* offset of start of central directory with respect to the starting disk number */ + { + ZPOS64_T pos = centraldir_pos_inzip - zi->add_position_when_writing_offset; + err = zip64local_putValue(&zi->z_filefunc,zi->filestream, (ZPOS64_T)pos,8); + } + return err; +} + +local int Write_EndOfCentralDirectoryRecord(zip64_internal* zi, uLong size_centraldir, ZPOS64_T centraldir_pos_inzip) { + int err = ZIP_OK; + + /*signature*/ + err = zip64local_putValue(&zi->z_filefunc,zi->filestream,(uLong)ENDHEADERMAGIC,4); + + if (err==ZIP_OK) /* number of this disk */ + err = zip64local_putValue(&zi->z_filefunc,zi->filestream,(uLong)0,2); + + if (err==ZIP_OK) /* number of the disk with the start of the central directory */ + err = zip64local_putValue(&zi->z_filefunc,zi->filestream,(uLong)0,2); + + if (err==ZIP_OK) /* total number of entries in the central dir on this disk */ + { + { + if(zi->number_entry >= 0xFFFF) + err = zip64local_putValue(&zi->z_filefunc,zi->filestream,(uLong)0xffff,2); // use value in ZIP64 record + else + err = zip64local_putValue(&zi->z_filefunc,zi->filestream,(uLong)zi->number_entry,2); + } + } + + if (err==ZIP_OK) /* total number of entries in the central dir */ + { + if(zi->number_entry >= 0xFFFF) + err = zip64local_putValue(&zi->z_filefunc,zi->filestream,(uLong)0xffff,2); // use value in ZIP64 record + else + err = zip64local_putValue(&zi->z_filefunc,zi->filestream,(uLong)zi->number_entry,2); + } + + if (err==ZIP_OK) /* size of the central directory */ + err = zip64local_putValue(&zi->z_filefunc,zi->filestream,(uLong)size_centraldir,4); + + if (err==ZIP_OK) /* offset of start of central directory with respect to the starting disk number */ + { + ZPOS64_T pos = centraldir_pos_inzip - zi->add_position_when_writing_offset; + if(pos >= 0xffffffff) + { + err = zip64local_putValue(&zi->z_filefunc,zi->filestream, (uLong)0xffffffff,4); + } + else + err = zip64local_putValue(&zi->z_filefunc,zi->filestream, (uLong)(centraldir_pos_inzip - zi->add_position_when_writing_offset),4); + } + + return err; +} + +local int Write_GlobalComment(zip64_internal* zi, const char* global_comment) { + int err = ZIP_OK; + uInt size_global_comment = 0; + + if(global_comment != NULL) + size_global_comment = (uInt)strlen(global_comment); + + err = zip64local_putValue(&zi->z_filefunc,zi->filestream,(uLong)size_global_comment,2); + + if (err == ZIP_OK && size_global_comment > 0) + { + if (ZWRITE64(zi->z_filefunc,zi->filestream, global_comment, size_global_comment) != size_global_comment) + err = ZIP_ERRNO; + } + return err; +} + +extern int ZEXPORT zipClose(zipFile file, const char* global_comment) { + zip64_internal* zi; + int err = 0; + uLong size_centraldir = 0; + ZPOS64_T centraldir_pos_inzip; + ZPOS64_T pos; + + if (file == NULL) + return ZIP_PARAMERROR; + + zi = (zip64_internal*)file; + + if (zi->in_opened_file_inzip == 1) + { + err = zipCloseFileInZip (file); + } + +#ifndef NO_ADDFILEINEXISTINGZIP + if (global_comment==NULL) + global_comment = zi->globalcomment; +#endif + + centraldir_pos_inzip = ZTELL64(zi->z_filefunc,zi->filestream); + + if (err==ZIP_OK) + { + linkedlist_datablock_internal* ldi = zi->central_dir.first_block; + while (ldi!=NULL) + { + if ((err==ZIP_OK) && (ldi->filled_in_this_block>0)) + { + if (ZWRITE64(zi->z_filefunc,zi->filestream, ldi->data, ldi->filled_in_this_block) != ldi->filled_in_this_block) + err = ZIP_ERRNO; + } + + size_centraldir += ldi->filled_in_this_block; + ldi = ldi->next_datablock; + } + } + free_linkedlist(&(zi->central_dir)); + + pos = centraldir_pos_inzip - zi->add_position_when_writing_offset; + if(pos >= 0xffffffff || zi->number_entry >= 0xFFFF) + { + ZPOS64_T Zip64EOCDpos = ZTELL64(zi->z_filefunc,zi->filestream); + Write_Zip64EndOfCentralDirectoryRecord(zi, size_centraldir, centraldir_pos_inzip); + + Write_Zip64EndOfCentralDirectoryLocator(zi, Zip64EOCDpos); + } + + if (err==ZIP_OK) + err = Write_EndOfCentralDirectoryRecord(zi, size_centraldir, centraldir_pos_inzip); + + if(err == ZIP_OK) + err = Write_GlobalComment(zi, global_comment); + + if (ZCLOSE64(zi->z_filefunc,zi->filestream) != 0) + if (err == ZIP_OK) + err = ZIP_ERRNO; + +#ifndef NO_ADDFILEINEXISTINGZIP + free(zi->globalcomment); +#endif + free(zi); + + return err; +} + +extern int ZEXPORT zipRemoveExtraInfoBlock(char* pData, int* dataLen, short sHeader) { + char* p = pData; + int size = 0; + char* pNewHeader; + char* pTmp; + short header; + short dataSize; + + int retVal = ZIP_OK; + + if(pData == NULL || dataLen == NULL || *dataLen < 4) + return ZIP_PARAMERROR; + + pNewHeader = (char*)ALLOC((unsigned)*dataLen); + pTmp = pNewHeader; + + while(p < (pData + *dataLen)) + { + header = *(short*)p; + dataSize = *(((short*)p)+1); + + if( header == sHeader ) // Header found. + { + p += dataSize + 4; // skip it. do not copy to temp buffer + } + else + { + // Extra Info block should not be removed, So copy it to the temp buffer. + memcpy(pTmp, p, dataSize + 4); + p += dataSize + 4; + size += dataSize + 4; + } + + } + + if(size < *dataLen) + { + // clean old extra info block. + memset(pData,0, *dataLen); + + // copy the new extra info block over the old + if(size > 0) + memcpy(pData, pNewHeader, size); + + // set the new extra info size + *dataLen = size; + + retVal = ZIP_OK; + } + else + retVal = ZIP_ERRNO; + + free(pNewHeader); + + return retVal; +} diff --git a/c-blosc/internal-complibs/zlib-1.3.1/contrib/minizip/zip.h b/c-blosc/internal-complibs/zlib-1.3.1/contrib/minizip/zip.h new file mode 100644 index 0000000..3e230d3 --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.3.1/contrib/minizip/zip.h @@ -0,0 +1,364 @@ +/* zip.h -- IO on .zip files using zlib + Version 1.1, February 14h, 2010 + part of the MiniZip project - ( http://www.winimage.com/zLibDll/minizip.html ) + + Copyright (C) 1998-2010 Gilles Vollant (minizip) ( http://www.winimage.com/zLibDll/minizip.html ) + + Modifications for Zip64 support + Copyright (C) 2009-2010 Mathias Svensson ( http://result42.com ) + + For more info read MiniZip_info.txt + + --------------------------------------------------------------------------- + + Condition of use and distribution are the same than zlib : + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + --------------------------------------------------------------------------- + + Changes + + See header of zip.h + +*/ + +#ifndef _zip12_H +#define _zip12_H + +#ifdef __cplusplus +extern "C" { +#endif + +//#define HAVE_BZIP2 + +#ifndef _ZLIB_H +#include "zlib.h" +#endif + +#ifndef _ZLIBIOAPI_H +#include "ioapi.h" +#endif + +#ifdef HAVE_BZIP2 +#include "bzlib.h" +#endif + +#define Z_BZIP2ED 12 + +#if defined(STRICTZIP) || defined(STRICTZIPUNZIP) +/* like the STRICT of WIN32, we define a pointer that cannot be converted + from (void*) without cast */ +typedef struct TagzipFile__ { int unused; } zipFile__; +typedef zipFile__ *zipFile; +#else +typedef voidp zipFile; +#endif + +#define ZIP_OK (0) +#define ZIP_EOF (0) +#define ZIP_ERRNO (Z_ERRNO) +#define ZIP_PARAMERROR (-102) +#define ZIP_BADZIPFILE (-103) +#define ZIP_INTERNALERROR (-104) + +#ifndef DEF_MEM_LEVEL +# if MAX_MEM_LEVEL >= 8 +# define DEF_MEM_LEVEL 8 +# else +# define DEF_MEM_LEVEL MAX_MEM_LEVEL +# endif +#endif +/* default memLevel */ + +/* tm_zip contain date/time info */ +typedef struct tm_zip_s +{ + int tm_sec; /* seconds after the minute - [0,59] */ + int tm_min; /* minutes after the hour - [0,59] */ + int tm_hour; /* hours since midnight - [0,23] */ + int tm_mday; /* day of the month - [1,31] */ + int tm_mon; /* months since January - [0,11] */ + int tm_year; /* years - [1980..2044] */ +} tm_zip; + +typedef struct +{ + tm_zip tmz_date; /* date in understandable format */ + uLong dosDate; /* if dos_date == 0, tmu_date is used */ +/* uLong flag; */ /* general purpose bit flag 2 bytes */ + + uLong internal_fa; /* internal file attributes 2 bytes */ + uLong external_fa; /* external file attributes 4 bytes */ +} zip_fileinfo; + +typedef const char* zipcharpc; + + +#define APPEND_STATUS_CREATE (0) +#define APPEND_STATUS_CREATEAFTER (1) +#define APPEND_STATUS_ADDINZIP (2) + +extern zipFile ZEXPORT zipOpen(const char *pathname, int append); +extern zipFile ZEXPORT zipOpen64(const void *pathname, int append); +/* + Create a zipfile. + pathname contain on Windows XP a filename like "c:\\zlib\\zlib113.zip" or on + an Unix computer "zlib/zlib113.zip". + if the file pathname exist and append==APPEND_STATUS_CREATEAFTER, the zip + will be created at the end of the file. + (useful if the file contain a self extractor code) + if the file pathname exist and append==APPEND_STATUS_ADDINZIP, we will + add files in existing zip (be sure you don't add file that doesn't exist) + If the zipfile cannot be opened, the return value is NULL. + Else, the return value is a zipFile Handle, usable with other function + of this zip package. +*/ + +/* Note : there is no delete function into a zipfile. + If you want delete file into a zipfile, you must open a zipfile, and create another + Of course, you can use RAW reading and writing to copy the file you did not want delete +*/ + +extern zipFile ZEXPORT zipOpen2(const char *pathname, + int append, + zipcharpc* globalcomment, + zlib_filefunc_def* pzlib_filefunc_def); + +extern zipFile ZEXPORT zipOpen2_64(const void *pathname, + int append, + zipcharpc* globalcomment, + zlib_filefunc64_def* pzlib_filefunc_def); + +extern zipFile ZEXPORT zipOpen3(const void *pathname, + int append, + zipcharpc* globalcomment, + zlib_filefunc64_32_def* pzlib_filefunc64_32_def); + +extern int ZEXPORT zipOpenNewFileInZip(zipFile file, + const char* filename, + const zip_fileinfo* zipfi, + const void* extrafield_local, + uInt size_extrafield_local, + const void* extrafield_global, + uInt size_extrafield_global, + const char* comment, + int method, + int level); + +extern int ZEXPORT zipOpenNewFileInZip64(zipFile file, + const char* filename, + const zip_fileinfo* zipfi, + const void* extrafield_local, + uInt size_extrafield_local, + const void* extrafield_global, + uInt size_extrafield_global, + const char* comment, + int method, + int level, + int zip64); + +/* + Open a file in the ZIP for writing. + filename : the filename in zip (if NULL, '-' without quote will be used + *zipfi contain supplemental information + if extrafield_local!=NULL and size_extrafield_local>0, extrafield_local + contains the extrafield data for the local header + if extrafield_global!=NULL and size_extrafield_global>0, extrafield_global + contains the extrafield data for the global header + if comment != NULL, comment contain the comment string + method contain the compression method (0 for store, Z_DEFLATED for deflate) + level contain the level of compression (can be Z_DEFAULT_COMPRESSION) + zip64 is set to 1 if a zip64 extended information block should be added to the local file header. + this MUST be '1' if the uncompressed size is >= 0xffffffff. + +*/ + + +extern int ZEXPORT zipOpenNewFileInZip2(zipFile file, + const char* filename, + const zip_fileinfo* zipfi, + const void* extrafield_local, + uInt size_extrafield_local, + const void* extrafield_global, + uInt size_extrafield_global, + const char* comment, + int method, + int level, + int raw); + + +extern int ZEXPORT zipOpenNewFileInZip2_64(zipFile file, + const char* filename, + const zip_fileinfo* zipfi, + const void* extrafield_local, + uInt size_extrafield_local, + const void* extrafield_global, + uInt size_extrafield_global, + const char* comment, + int method, + int level, + int raw, + int zip64); +/* + Same than zipOpenNewFileInZip, except if raw=1, we write raw file + */ + +extern int ZEXPORT zipOpenNewFileInZip3(zipFile file, + const char* filename, + const zip_fileinfo* zipfi, + const void* extrafield_local, + uInt size_extrafield_local, + const void* extrafield_global, + uInt size_extrafield_global, + const char* comment, + int method, + int level, + int raw, + int windowBits, + int memLevel, + int strategy, + const char* password, + uLong crcForCrypting); + +extern int ZEXPORT zipOpenNewFileInZip3_64(zipFile file, + const char* filename, + const zip_fileinfo* zipfi, + const void* extrafield_local, + uInt size_extrafield_local, + const void* extrafield_global, + uInt size_extrafield_global, + const char* comment, + int method, + int level, + int raw, + int windowBits, + int memLevel, + int strategy, + const char* password, + uLong crcForCrypting, + int zip64); + +/* + Same than zipOpenNewFileInZip2, except + windowBits,memLevel,,strategy : see parameter strategy in deflateInit2 + password : crypting password (NULL for no crypting) + crcForCrypting : crc of file to compress (needed for crypting) + */ + +extern int ZEXPORT zipOpenNewFileInZip4(zipFile file, + const char* filename, + const zip_fileinfo* zipfi, + const void* extrafield_local, + uInt size_extrafield_local, + const void* extrafield_global, + uInt size_extrafield_global, + const char* comment, + int method, + int level, + int raw, + int windowBits, + int memLevel, + int strategy, + const char* password, + uLong crcForCrypting, + uLong versionMadeBy, + uLong flagBase); + + +extern int ZEXPORT zipOpenNewFileInZip4_64(zipFile file, + const char* filename, + const zip_fileinfo* zipfi, + const void* extrafield_local, + uInt size_extrafield_local, + const void* extrafield_global, + uInt size_extrafield_global, + const char* comment, + int method, + int level, + int raw, + int windowBits, + int memLevel, + int strategy, + const char* password, + uLong crcForCrypting, + uLong versionMadeBy, + uLong flagBase, + int zip64); +/* + Same than zipOpenNewFileInZip4, except + versionMadeBy : value for Version made by field + flag : value for flag field (compression level info will be added) + */ + + +extern int ZEXPORT zipWriteInFileInZip(zipFile file, + const void* buf, + unsigned len); +/* + Write data in the zipfile +*/ + +extern int ZEXPORT zipCloseFileInZip(zipFile file); +/* + Close the current file in the zipfile +*/ + +extern int ZEXPORT zipCloseFileInZipRaw(zipFile file, + uLong uncompressed_size, + uLong crc32); + +extern int ZEXPORT zipCloseFileInZipRaw64(zipFile file, + ZPOS64_T uncompressed_size, + uLong crc32); + +/* + Close the current file in the zipfile, for file opened with + parameter raw=1 in zipOpenNewFileInZip2 + uncompressed_size and crc32 are value for the uncompressed size +*/ + +extern int ZEXPORT zipClose(zipFile file, + const char* global_comment); +/* + Close the zipfile +*/ + + +extern int ZEXPORT zipRemoveExtraInfoBlock(char* pData, int* dataLen, short sHeader); +/* + zipRemoveExtraInfoBlock - Added by Mathias Svensson + + Remove extra information block from a extra information data for the local file header or central directory header + + It is needed to remove ZIP64 extra information blocks when before data is written if using RAW mode. + + 0x0001 is the signature header for the ZIP64 extra information blocks + + usage. + Remove ZIP64 Extra information from a central director extra field data + zipRemoveExtraInfoBlock(pCenDirExtraFieldData, &nCenDirExtraFieldDataLen, 0x0001); + + Remove ZIP64 Extra information from a Local File Header extra field data + zipRemoveExtraInfoBlock(pLocalHeaderExtraFieldData, &nLocalHeaderExtraFieldDataLen, 0x0001); +*/ + +#ifdef __cplusplus +} +#endif + +#endif /* _zip64_H */ diff --git a/c-blosc/internal-complibs/zlib-1.3.1/contrib/puff/puff.c b/c-blosc/internal-complibs/zlib-1.3.1/contrib/puff/puff.c new file mode 100644 index 0000000..d759825 --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.3.1/contrib/puff/puff.c @@ -0,0 +1,840 @@ +/* + * puff.c + * Copyright (C) 2002-2013 Mark Adler + * For conditions of distribution and use, see copyright notice in puff.h + * version 2.3, 21 Jan 2013 + * + * puff.c is a simple inflate written to be an unambiguous way to specify the + * deflate format. It is not written for speed but rather simplicity. As a + * side benefit, this code might actually be useful when small code is more + * important than speed, such as bootstrap applications. For typical deflate + * data, zlib's inflate() is about four times as fast as puff(). zlib's + * inflate compiles to around 20K on my machine, whereas puff.c compiles to + * around 4K on my machine (a PowerPC using GNU cc). If the faster decode() + * function here is used, then puff() is only twice as slow as zlib's + * inflate(). + * + * All dynamically allocated memory comes from the stack. The stack required + * is less than 2K bytes. This code is compatible with 16-bit int's and + * assumes that long's are at least 32 bits. puff.c uses the short data type, + * assumed to be 16 bits, for arrays in order to conserve memory. The code + * works whether integers are stored big endian or little endian. + * + * In the comments below are "Format notes" that describe the inflate process + * and document some of the less obvious aspects of the format. This source + * code is meant to supplement RFC 1951, which formally describes the deflate + * format: + * + * http://www.zlib.org/rfc-deflate.html + */ + +/* + * Change history: + * + * 1.0 10 Feb 2002 - First version + * 1.1 17 Feb 2002 - Clarifications of some comments and notes + * - Update puff() dest and source pointers on negative + * errors to facilitate debugging deflators + * - Remove longest from struct huffman -- not needed + * - Simplify offs[] index in construct() + * - Add input size and checking, using longjmp() to + * maintain easy readability + * - Use short data type for large arrays + * - Use pointers instead of long to specify source and + * destination sizes to avoid arbitrary 4 GB limits + * 1.2 17 Mar 2002 - Add faster version of decode(), doubles speed (!), + * but leave simple version for readability + * - Make sure invalid distances detected if pointers + * are 16 bits + * - Fix fixed codes table error + * - Provide a scanning mode for determining size of + * uncompressed data + * 1.3 20 Mar 2002 - Go back to lengths for puff() parameters [Gailly] + * - Add a puff.h file for the interface + * - Add braces in puff() for else do [Gailly] + * - Use indexes instead of pointers for readability + * 1.4 31 Mar 2002 - Simplify construct() code set check + * - Fix some comments + * - Add FIXLCODES #define + * 1.5 6 Apr 2002 - Minor comment fixes + * 1.6 7 Aug 2002 - Minor format changes + * 1.7 3 Mar 2003 - Added test code for distribution + * - Added zlib-like license + * 1.8 9 Jan 2004 - Added some comments on no distance codes case + * 1.9 21 Feb 2008 - Fix bug on 16-bit integer architectures [Pohland] + * - Catch missing end-of-block symbol error + * 2.0 25 Jul 2008 - Add #define to permit distance too far back + * - Add option in TEST code for puff to write the data + * - Add option in TEST code to skip input bytes + * - Allow TEST code to read from piped stdin + * 2.1 4 Apr 2010 - Avoid variable initialization for happier compilers + * - Avoid unsigned comparisons for even happier compilers + * 2.2 25 Apr 2010 - Fix bug in variable initializations [Oberhumer] + * - Add const where appropriate [Oberhumer] + * - Split if's and ?'s for coverage testing + * - Break out test code to separate file + * - Move NIL to puff.h + * - Allow incomplete code only if single code length is 1 + * - Add full code coverage test to Makefile + * 2.3 21 Jan 2013 - Check for invalid code length codes in dynamic blocks + */ + +#include /* for setjmp(), longjmp(), and jmp_buf */ +#include "puff.h" /* prototype for puff() */ + +#define local static /* for local function definitions */ + +/* + * Maximums for allocations and loops. It is not useful to change these -- + * they are fixed by the deflate format. + */ +#define MAXBITS 15 /* maximum bits in a code */ +#define MAXLCODES 286 /* maximum number of literal/length codes */ +#define MAXDCODES 30 /* maximum number of distance codes */ +#define MAXCODES (MAXLCODES+MAXDCODES) /* maximum codes lengths to read */ +#define FIXLCODES 288 /* number of fixed literal/length codes */ + +/* input and output state */ +struct state { + /* output state */ + unsigned char *out; /* output buffer */ + unsigned long outlen; /* available space at out */ + unsigned long outcnt; /* bytes written to out so far */ + + /* input state */ + const unsigned char *in; /* input buffer */ + unsigned long inlen; /* available input at in */ + unsigned long incnt; /* bytes read so far */ + int bitbuf; /* bit buffer */ + int bitcnt; /* number of bits in bit buffer */ + + /* input limit error return state for bits() and decode() */ + jmp_buf env; +}; + +/* + * Return need bits from the input stream. This always leaves less than + * eight bits in the buffer. bits() works properly for need == 0. + * + * Format notes: + * + * - Bits are stored in bytes from the least significant bit to the most + * significant bit. Therefore bits are dropped from the bottom of the bit + * buffer, using shift right, and new bytes are appended to the top of the + * bit buffer, using shift left. + */ +local int bits(struct state *s, int need) +{ + long val; /* bit accumulator (can use up to 20 bits) */ + + /* load at least need bits into val */ + val = s->bitbuf; + while (s->bitcnt < need) { + if (s->incnt == s->inlen) + longjmp(s->env, 1); /* out of input */ + val |= (long)(s->in[s->incnt++]) << s->bitcnt; /* load eight bits */ + s->bitcnt += 8; + } + + /* drop need bits and update buffer, always zero to seven bits left */ + s->bitbuf = (int)(val >> need); + s->bitcnt -= need; + + /* return need bits, zeroing the bits above that */ + return (int)(val & ((1L << need) - 1)); +} + +/* + * Process a stored block. + * + * Format notes: + * + * - After the two-bit stored block type (00), the stored block length and + * stored bytes are byte-aligned for fast copying. Therefore any leftover + * bits in the byte that has the last bit of the type, as many as seven, are + * discarded. The value of the discarded bits are not defined and should not + * be checked against any expectation. + * + * - The second inverted copy of the stored block length does not have to be + * checked, but it's probably a good idea to do so anyway. + * + * - A stored block can have zero length. This is sometimes used to byte-align + * subsets of the compressed data for random access or partial recovery. + */ +local int stored(struct state *s) +{ + unsigned len; /* length of stored block */ + + /* discard leftover bits from current byte (assumes s->bitcnt < 8) */ + s->bitbuf = 0; + s->bitcnt = 0; + + /* get length and check against its one's complement */ + if (s->incnt + 4 > s->inlen) + return 2; /* not enough input */ + len = s->in[s->incnt++]; + len |= s->in[s->incnt++] << 8; + if (s->in[s->incnt++] != (~len & 0xff) || + s->in[s->incnt++] != ((~len >> 8) & 0xff)) + return -2; /* didn't match complement! */ + + /* copy len bytes from in to out */ + if (s->incnt + len > s->inlen) + return 2; /* not enough input */ + if (s->out != NIL) { + if (s->outcnt + len > s->outlen) + return 1; /* not enough output space */ + while (len--) + s->out[s->outcnt++] = s->in[s->incnt++]; + } + else { /* just scanning */ + s->outcnt += len; + s->incnt += len; + } + + /* done with a valid stored block */ + return 0; +} + +/* + * Huffman code decoding tables. count[1..MAXBITS] is the number of symbols of + * each length, which for a canonical code are stepped through in order. + * symbol[] are the symbol values in canonical order, where the number of + * entries is the sum of the counts in count[]. The decoding process can be + * seen in the function decode() below. + */ +struct huffman { + short *count; /* number of symbols of each length */ + short *symbol; /* canonically ordered symbols */ +}; + +/* + * Decode a code from the stream s using huffman table h. Return the symbol or + * a negative value if there is an error. If all of the lengths are zero, i.e. + * an empty code, or if the code is incomplete and an invalid code is received, + * then -10 is returned after reading MAXBITS bits. + * + * Format notes: + * + * - The codes as stored in the compressed data are bit-reversed relative to + * a simple integer ordering of codes of the same lengths. Hence below the + * bits are pulled from the compressed data one at a time and used to + * build the code value reversed from what is in the stream in order to + * permit simple integer comparisons for decoding. A table-based decoding + * scheme (as used in zlib) does not need to do this reversal. + * + * - The first code for the shortest length is all zeros. Subsequent codes of + * the same length are simply integer increments of the previous code. When + * moving up a length, a zero bit is appended to the code. For a complete + * code, the last code of the longest length will be all ones. + * + * - Incomplete codes are handled by this decoder, since they are permitted + * in the deflate format. See the format notes for fixed() and dynamic(). + */ +#ifdef SLOW +local int decode(struct state *s, const struct huffman *h) +{ + int len; /* current number of bits in code */ + int code; /* len bits being decoded */ + int first; /* first code of length len */ + int count; /* number of codes of length len */ + int index; /* index of first code of length len in symbol table */ + + code = first = index = 0; + for (len = 1; len <= MAXBITS; len++) { + code |= bits(s, 1); /* get next bit */ + count = h->count[len]; + if (code - count < first) /* if length len, return symbol */ + return h->symbol[index + (code - first)]; + index += count; /* else update for next length */ + first += count; + first <<= 1; + code <<= 1; + } + return -10; /* ran out of codes */ +} + +/* + * A faster version of decode() for real applications of this code. It's not + * as readable, but it makes puff() twice as fast. And it only makes the code + * a few percent larger. + */ +#else /* !SLOW */ +local int decode(struct state *s, const struct huffman *h) +{ + int len; /* current number of bits in code */ + int code; /* len bits being decoded */ + int first; /* first code of length len */ + int count; /* number of codes of length len */ + int index; /* index of first code of length len in symbol table */ + int bitbuf; /* bits from stream */ + int left; /* bits left in next or left to process */ + short *next; /* next number of codes */ + + bitbuf = s->bitbuf; + left = s->bitcnt; + code = first = index = 0; + len = 1; + next = h->count + 1; + while (1) { + while (left--) { + code |= bitbuf & 1; + bitbuf >>= 1; + count = *next++; + if (code - count < first) { /* if length len, return symbol */ + s->bitbuf = bitbuf; + s->bitcnt = (s->bitcnt - len) & 7; + return h->symbol[index + (code - first)]; + } + index += count; /* else update for next length */ + first += count; + first <<= 1; + code <<= 1; + len++; + } + left = (MAXBITS+1) - len; + if (left == 0) + break; + if (s->incnt == s->inlen) + longjmp(s->env, 1); /* out of input */ + bitbuf = s->in[s->incnt++]; + if (left > 8) + left = 8; + } + return -10; /* ran out of codes */ +} +#endif /* SLOW */ + +/* + * Given the list of code lengths length[0..n-1] representing a canonical + * Huffman code for n symbols, construct the tables required to decode those + * codes. Those tables are the number of codes of each length, and the symbols + * sorted by length, retaining their original order within each length. The + * return value is zero for a complete code set, negative for an over- + * subscribed code set, and positive for an incomplete code set. The tables + * can be used if the return value is zero or positive, but they cannot be used + * if the return value is negative. If the return value is zero, it is not + * possible for decode() using that table to return an error--any stream of + * enough bits will resolve to a symbol. If the return value is positive, then + * it is possible for decode() using that table to return an error for received + * codes past the end of the incomplete lengths. + * + * Not used by decode(), but used for error checking, h->count[0] is the number + * of the n symbols not in the code. So n - h->count[0] is the number of + * codes. This is useful for checking for incomplete codes that have more than + * one symbol, which is an error in a dynamic block. + * + * Assumption: for all i in 0..n-1, 0 <= length[i] <= MAXBITS + * This is assured by the construction of the length arrays in dynamic() and + * fixed() and is not verified by construct(). + * + * Format notes: + * + * - Permitted and expected examples of incomplete codes are one of the fixed + * codes and any code with a single symbol which in deflate is coded as one + * bit instead of zero bits. See the format notes for fixed() and dynamic(). + * + * - Within a given code length, the symbols are kept in ascending order for + * the code bits definition. + */ +local int construct(struct huffman *h, const short *length, int n) +{ + int symbol; /* current symbol when stepping through length[] */ + int len; /* current length when stepping through h->count[] */ + int left; /* number of possible codes left of current length */ + short offs[MAXBITS+1]; /* offsets in symbol table for each length */ + + /* count number of codes of each length */ + for (len = 0; len <= MAXBITS; len++) + h->count[len] = 0; + for (symbol = 0; symbol < n; symbol++) + (h->count[length[symbol]])++; /* assumes lengths are within bounds */ + if (h->count[0] == n) /* no codes! */ + return 0; /* complete, but decode() will fail */ + + /* check for an over-subscribed or incomplete set of lengths */ + left = 1; /* one possible code of zero length */ + for (len = 1; len <= MAXBITS; len++) { + left <<= 1; /* one more bit, double codes left */ + left -= h->count[len]; /* deduct count from possible codes */ + if (left < 0) + return left; /* over-subscribed--return negative */ + } /* left > 0 means incomplete */ + + /* generate offsets into symbol table for each length for sorting */ + offs[1] = 0; + for (len = 1; len < MAXBITS; len++) + offs[len + 1] = offs[len] + h->count[len]; + + /* + * put symbols in table sorted by length, by symbol order within each + * length + */ + for (symbol = 0; symbol < n; symbol++) + if (length[symbol] != 0) + h->symbol[offs[length[symbol]]++] = symbol; + + /* return zero for complete set, positive for incomplete set */ + return left; +} + +/* + * Decode literal/length and distance codes until an end-of-block code. + * + * Format notes: + * + * - Compressed data that is after the block type if fixed or after the code + * description if dynamic is a combination of literals and length/distance + * pairs terminated by and end-of-block code. Literals are simply Huffman + * coded bytes. A length/distance pair is a coded length followed by a + * coded distance to represent a string that occurs earlier in the + * uncompressed data that occurs again at the current location. + * + * - Literals, lengths, and the end-of-block code are combined into a single + * code of up to 286 symbols. They are 256 literals (0..255), 29 length + * symbols (257..285), and the end-of-block symbol (256). + * + * - There are 256 possible lengths (3..258), and so 29 symbols are not enough + * to represent all of those. Lengths 3..10 and 258 are in fact represented + * by just a length symbol. Lengths 11..257 are represented as a symbol and + * some number of extra bits that are added as an integer to the base length + * of the length symbol. The number of extra bits is determined by the base + * length symbol. These are in the static arrays below, lens[] for the base + * lengths and lext[] for the corresponding number of extra bits. + * + * - The reason that 258 gets its own symbol is that the longest length is used + * often in highly redundant files. Note that 258 can also be coded as the + * base value 227 plus the maximum extra value of 31. While a good deflate + * should never do this, it is not an error, and should be decoded properly. + * + * - If a length is decoded, including its extra bits if any, then it is + * followed a distance code. There are up to 30 distance symbols. Again + * there are many more possible distances (1..32768), so extra bits are added + * to a base value represented by the symbol. The distances 1..4 get their + * own symbol, but the rest require extra bits. The base distances and + * corresponding number of extra bits are below in the static arrays dist[] + * and dext[]. + * + * - Literal bytes are simply written to the output. A length/distance pair is + * an instruction to copy previously uncompressed bytes to the output. The + * copy is from distance bytes back in the output stream, copying for length + * bytes. + * + * - Distances pointing before the beginning of the output data are not + * permitted. + * + * - Overlapped copies, where the length is greater than the distance, are + * allowed and common. For example, a distance of one and a length of 258 + * simply copies the last byte 258 times. A distance of four and a length of + * twelve copies the last four bytes three times. A simple forward copy + * ignoring whether the length is greater than the distance or not implements + * this correctly. You should not use memcpy() since its behavior is not + * defined for overlapped arrays. You should not use memmove() or bcopy() + * since though their behavior -is- defined for overlapping arrays, it is + * defined to do the wrong thing in this case. + */ +local int codes(struct state *s, + const struct huffman *lencode, + const struct huffman *distcode) +{ + int symbol; /* decoded symbol */ + int len; /* length for copy */ + unsigned dist; /* distance for copy */ + static const short lens[29] = { /* Size base for length codes 257..285 */ + 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, + 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258}; + static const short lext[29] = { /* Extra bits for length codes 257..285 */ + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, + 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0}; + static const short dists[30] = { /* Offset base for distance codes 0..29 */ + 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, + 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, + 8193, 12289, 16385, 24577}; + static const short dext[30] = { /* Extra bits for distance codes 0..29 */ + 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, + 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, + 12, 12, 13, 13}; + + /* decode literals and length/distance pairs */ + do { + symbol = decode(s, lencode); + if (symbol < 0) + return symbol; /* invalid symbol */ + if (symbol < 256) { /* literal: symbol is the byte */ + /* write out the literal */ + if (s->out != NIL) { + if (s->outcnt == s->outlen) + return 1; + s->out[s->outcnt] = symbol; + } + s->outcnt++; + } + else if (symbol > 256) { /* length */ + /* get and compute length */ + symbol -= 257; + if (symbol >= 29) + return -10; /* invalid fixed code */ + len = lens[symbol] + bits(s, lext[symbol]); + + /* get and check distance */ + symbol = decode(s, distcode); + if (symbol < 0) + return symbol; /* invalid symbol */ + dist = dists[symbol] + bits(s, dext[symbol]); +#ifndef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR + if (dist > s->outcnt) + return -11; /* distance too far back */ +#endif + + /* copy length bytes from distance bytes back */ + if (s->out != NIL) { + if (s->outcnt + len > s->outlen) + return 1; + while (len--) { + s->out[s->outcnt] = +#ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR + dist > s->outcnt ? + 0 : +#endif + s->out[s->outcnt - dist]; + s->outcnt++; + } + } + else + s->outcnt += len; + } + } while (symbol != 256); /* end of block symbol */ + + /* done with a valid fixed or dynamic block */ + return 0; +} + +/* + * Process a fixed codes block. + * + * Format notes: + * + * - This block type can be useful for compressing small amounts of data for + * which the size of the code descriptions in a dynamic block exceeds the + * benefit of custom codes for that block. For fixed codes, no bits are + * spent on code descriptions. Instead the code lengths for literal/length + * codes and distance codes are fixed. The specific lengths for each symbol + * can be seen in the "for" loops below. + * + * - The literal/length code is complete, but has two symbols that are invalid + * and should result in an error if received. This cannot be implemented + * simply as an incomplete code since those two symbols are in the "middle" + * of the code. They are eight bits long and the longest literal/length\ + * code is nine bits. Therefore the code must be constructed with those + * symbols, and the invalid symbols must be detected after decoding. + * + * - The fixed distance codes also have two invalid symbols that should result + * in an error if received. Since all of the distance codes are the same + * length, this can be implemented as an incomplete code. Then the invalid + * codes are detected while decoding. + */ +local int fixed(struct state *s) +{ + static int virgin = 1; + static short lencnt[MAXBITS+1], lensym[FIXLCODES]; + static short distcnt[MAXBITS+1], distsym[MAXDCODES]; + static struct huffman lencode, distcode; + + /* build fixed huffman tables if first call (may not be thread safe) */ + if (virgin) { + int symbol; + short lengths[FIXLCODES]; + + /* construct lencode and distcode */ + lencode.count = lencnt; + lencode.symbol = lensym; + distcode.count = distcnt; + distcode.symbol = distsym; + + /* literal/length table */ + for (symbol = 0; symbol < 144; symbol++) + lengths[symbol] = 8; + for (; symbol < 256; symbol++) + lengths[symbol] = 9; + for (; symbol < 280; symbol++) + lengths[symbol] = 7; + for (; symbol < FIXLCODES; symbol++) + lengths[symbol] = 8; + construct(&lencode, lengths, FIXLCODES); + + /* distance table */ + for (symbol = 0; symbol < MAXDCODES; symbol++) + lengths[symbol] = 5; + construct(&distcode, lengths, MAXDCODES); + + /* do this just once */ + virgin = 0; + } + + /* decode data until end-of-block code */ + return codes(s, &lencode, &distcode); +} + +/* + * Process a dynamic codes block. + * + * Format notes: + * + * - A dynamic block starts with a description of the literal/length and + * distance codes for that block. New dynamic blocks allow the compressor to + * rapidly adapt to changing data with new codes optimized for that data. + * + * - The codes used by the deflate format are "canonical", which means that + * the actual bits of the codes are generated in an unambiguous way simply + * from the number of bits in each code. Therefore the code descriptions + * are simply a list of code lengths for each symbol. + * + * - The code lengths are stored in order for the symbols, so lengths are + * provided for each of the literal/length symbols, and for each of the + * distance symbols. + * + * - If a symbol is not used in the block, this is represented by a zero as the + * code length. This does not mean a zero-length code, but rather that no + * code should be created for this symbol. There is no way in the deflate + * format to represent a zero-length code. + * + * - The maximum number of bits in a code is 15, so the possible lengths for + * any code are 1..15. + * + * - The fact that a length of zero is not permitted for a code has an + * interesting consequence. Normally if only one symbol is used for a given + * code, then in fact that code could be represented with zero bits. However + * in deflate, that code has to be at least one bit. So for example, if + * only a single distance base symbol appears in a block, then it will be + * represented by a single code of length one, in particular one 0 bit. This + * is an incomplete code, since if a 1 bit is received, it has no meaning, + * and should result in an error. So incomplete distance codes of one symbol + * should be permitted, and the receipt of invalid codes should be handled. + * + * - It is also possible to have a single literal/length code, but that code + * must be the end-of-block code, since every dynamic block has one. This + * is not the most efficient way to create an empty block (an empty fixed + * block is fewer bits), but it is allowed by the format. So incomplete + * literal/length codes of one symbol should also be permitted. + * + * - If there are only literal codes and no lengths, then there are no distance + * codes. This is represented by one distance code with zero bits. + * + * - The list of up to 286 length/literal lengths and up to 30 distance lengths + * are themselves compressed using Huffman codes and run-length encoding. In + * the list of code lengths, a 0 symbol means no code, a 1..15 symbol means + * that length, and the symbols 16, 17, and 18 are run-length instructions. + * Each of 16, 17, and 18 are followed by extra bits to define the length of + * the run. 16 copies the last length 3 to 6 times. 17 represents 3 to 10 + * zero lengths, and 18 represents 11 to 138 zero lengths. Unused symbols + * are common, hence the special coding for zero lengths. + * + * - The symbols for 0..18 are Huffman coded, and so that code must be + * described first. This is simply a sequence of up to 19 three-bit values + * representing no code (0) or the code length for that symbol (1..7). + * + * - A dynamic block starts with three fixed-size counts from which is computed + * the number of literal/length code lengths, the number of distance code + * lengths, and the number of code length code lengths (ok, you come up with + * a better name!) in the code descriptions. For the literal/length and + * distance codes, lengths after those provided are considered zero, i.e. no + * code. The code length code lengths are received in a permuted order (see + * the order[] array below) to make a short code length code length list more + * likely. As it turns out, very short and very long codes are less likely + * to be seen in a dynamic code description, hence what may appear initially + * to be a peculiar ordering. + * + * - Given the number of literal/length code lengths (nlen) and distance code + * lengths (ndist), then they are treated as one long list of nlen + ndist + * code lengths. Therefore run-length coding can and often does cross the + * boundary between the two sets of lengths. + * + * - So to summarize, the code description at the start of a dynamic block is + * three counts for the number of code lengths for the literal/length codes, + * the distance codes, and the code length codes. This is followed by the + * code length code lengths, three bits each. This is used to construct the + * code length code which is used to read the remainder of the lengths. Then + * the literal/length code lengths and distance lengths are read as a single + * set of lengths using the code length codes. Codes are constructed from + * the resulting two sets of lengths, and then finally you can start + * decoding actual compressed data in the block. + * + * - For reference, a "typical" size for the code description in a dynamic + * block is around 80 bytes. + */ +local int dynamic(struct state *s) +{ + int nlen, ndist, ncode; /* number of lengths in descriptor */ + int index; /* index of lengths[] */ + int err; /* construct() return value */ + short lengths[MAXCODES]; /* descriptor code lengths */ + short lencnt[MAXBITS+1], lensym[MAXLCODES]; /* lencode memory */ + short distcnt[MAXBITS+1], distsym[MAXDCODES]; /* distcode memory */ + struct huffman lencode, distcode; /* length and distance codes */ + static const short order[19] = /* permutation of code length codes */ + {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}; + + /* construct lencode and distcode */ + lencode.count = lencnt; + lencode.symbol = lensym; + distcode.count = distcnt; + distcode.symbol = distsym; + + /* get number of lengths in each table, check lengths */ + nlen = bits(s, 5) + 257; + ndist = bits(s, 5) + 1; + ncode = bits(s, 4) + 4; + if (nlen > MAXLCODES || ndist > MAXDCODES) + return -3; /* bad counts */ + + /* read code length code lengths (really), missing lengths are zero */ + for (index = 0; index < ncode; index++) + lengths[order[index]] = bits(s, 3); + for (; index < 19; index++) + lengths[order[index]] = 0; + + /* build huffman table for code lengths codes (use lencode temporarily) */ + err = construct(&lencode, lengths, 19); + if (err != 0) /* require complete code set here */ + return -4; + + /* read length/literal and distance code length tables */ + index = 0; + while (index < nlen + ndist) { + int symbol; /* decoded value */ + int len; /* last length to repeat */ + + symbol = decode(s, &lencode); + if (symbol < 0) + return symbol; /* invalid symbol */ + if (symbol < 16) /* length in 0..15 */ + lengths[index++] = symbol; + else { /* repeat instruction */ + len = 0; /* assume repeating zeros */ + if (symbol == 16) { /* repeat last length 3..6 times */ + if (index == 0) + return -5; /* no last length! */ + len = lengths[index - 1]; /* last length */ + symbol = 3 + bits(s, 2); + } + else if (symbol == 17) /* repeat zero 3..10 times */ + symbol = 3 + bits(s, 3); + else /* == 18, repeat zero 11..138 times */ + symbol = 11 + bits(s, 7); + if (index + symbol > nlen + ndist) + return -6; /* too many lengths! */ + while (symbol--) /* repeat last or zero symbol times */ + lengths[index++] = len; + } + } + + /* check for end-of-block code -- there better be one! */ + if (lengths[256] == 0) + return -9; + + /* build huffman table for literal/length codes */ + err = construct(&lencode, lengths, nlen); + if (err && (err < 0 || nlen != lencode.count[0] + lencode.count[1])) + return -7; /* incomplete code ok only for single length 1 code */ + + /* build huffman table for distance codes */ + err = construct(&distcode, lengths + nlen, ndist); + if (err && (err < 0 || ndist != distcode.count[0] + distcode.count[1])) + return -8; /* incomplete code ok only for single length 1 code */ + + /* decode data until end-of-block code */ + return codes(s, &lencode, &distcode); +} + +/* + * Inflate source to dest. On return, destlen and sourcelen are updated to the + * size of the uncompressed data and the size of the deflate data respectively. + * On success, the return value of puff() is zero. If there is an error in the + * source data, i.e. it is not in the deflate format, then a negative value is + * returned. If there is not enough input available or there is not enough + * output space, then a positive error is returned. In that case, destlen and + * sourcelen are not updated to facilitate retrying from the beginning with the + * provision of more input data or more output space. In the case of invalid + * inflate data (a negative error), the dest and source pointers are updated to + * facilitate the debugging of deflators. + * + * puff() also has a mode to determine the size of the uncompressed output with + * no output written. For this dest must be (unsigned char *)0. In this case, + * the input value of *destlen is ignored, and on return *destlen is set to the + * size of the uncompressed output. + * + * The return codes are: + * + * 2: available inflate data did not terminate + * 1: output space exhausted before completing inflate + * 0: successful inflate + * -1: invalid block type (type == 3) + * -2: stored block length did not match one's complement + * -3: dynamic block code description: too many length or distance codes + * -4: dynamic block code description: code lengths codes incomplete + * -5: dynamic block code description: repeat lengths with no first length + * -6: dynamic block code description: repeat more than specified lengths + * -7: dynamic block code description: invalid literal/length code lengths + * -8: dynamic block code description: invalid distance code lengths + * -9: dynamic block code description: missing end-of-block code + * -10: invalid literal/length or distance code in fixed or dynamic block + * -11: distance is too far back in fixed or dynamic block + * + * Format notes: + * + * - Three bits are read for each block to determine the kind of block and + * whether or not it is the last block. Then the block is decoded and the + * process repeated if it was not the last block. + * + * - The leftover bits in the last byte of the deflate data after the last + * block (if it was a fixed or dynamic block) are undefined and have no + * expected values to check. + */ +int puff(unsigned char *dest, /* pointer to destination pointer */ + unsigned long *destlen, /* amount of output space */ + const unsigned char *source, /* pointer to source data pointer */ + unsigned long *sourcelen) /* amount of input available */ +{ + struct state s; /* input/output state */ + int last, type; /* block information */ + int err; /* return value */ + + /* initialize output state */ + s.out = dest; + s.outlen = *destlen; /* ignored if dest is NIL */ + s.outcnt = 0; + + /* initialize input state */ + s.in = source; + s.inlen = *sourcelen; + s.incnt = 0; + s.bitbuf = 0; + s.bitcnt = 0; + + /* return if bits() or decode() tries to read past available input */ + if (setjmp(s.env) != 0) /* if came back here via longjmp() */ + err = 2; /* then skip do-loop, return error */ + else { + /* process blocks until last block or error */ + do { + last = bits(&s, 1); /* one if last block */ + type = bits(&s, 2); /* block type 0..3 */ + err = type == 0 ? + stored(&s) : + (type == 1 ? + fixed(&s) : + (type == 2 ? + dynamic(&s) : + -1)); /* type == 3, invalid */ + if (err != 0) + break; /* return with error */ + } while (!last); + } + + /* update the lengths and return */ + if (err <= 0) { + *destlen = s.outcnt; + *sourcelen = s.incnt; + } + return err; +} diff --git a/c-blosc/internal-complibs/zlib-1.3.1/contrib/puff/puff.h b/c-blosc/internal-complibs/zlib-1.3.1/contrib/puff/puff.h new file mode 100644 index 0000000..e23a245 --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.3.1/contrib/puff/puff.h @@ -0,0 +1,35 @@ +/* puff.h + Copyright (C) 2002-2013 Mark Adler, all rights reserved + version 2.3, 21 Jan 2013 + + This software is provided 'as-is', without any express or implied + warranty. In no event will the author be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + Mark Adler madler@alumni.caltech.edu + */ + + +/* + * See puff.c for purpose and usage. + */ +#ifndef NIL +# define NIL ((unsigned char *)0) /* for no output option */ +#endif + +int puff(unsigned char *dest, /* pointer to destination pointer */ + unsigned long *destlen, /* amount of output space */ + const unsigned char *source, /* pointer to source data pointer */ + unsigned long *sourcelen); /* amount of input available */ diff --git a/c-blosc/internal-complibs/zlib-1.3.1/contrib/puff/pufftest.c b/c-blosc/internal-complibs/zlib-1.3.1/contrib/puff/pufftest.c new file mode 100644 index 0000000..5f72ecc --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.3.1/contrib/puff/pufftest.c @@ -0,0 +1,165 @@ +/* + * pufftest.c + * Copyright (C) 2002-2013 Mark Adler + * For conditions of distribution and use, see copyright notice in puff.h + * version 2.3, 21 Jan 2013 + */ + +/* Example of how to use puff(). + + Usage: puff [-w] [-f] [-nnn] file + ... | puff [-w] [-f] [-nnn] + + where file is the input file with deflate data, nnn is the number of bytes + of input to skip before inflating (e.g. to skip a zlib or gzip header), and + -w is used to write the decompressed data to stdout. -f is for coverage + testing, and causes pufftest to fail with not enough output space (-f does + a write like -w, so -w is not required). */ + +#include +#include +#include "puff.h" + +#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(__CYGWIN__) +# include +# include +# define SET_BINARY_MODE(file) setmode(fileno(file), O_BINARY) +#else +# define SET_BINARY_MODE(file) +#endif + +#define local static + +/* Return size times approximately the cube root of 2, keeping the result as 1, + 3, or 5 times a power of 2 -- the result is always > size, until the result + is the maximum value of an unsigned long, where it remains. This is useful + to keep reallocations less than ~33% over the actual data. */ +local size_t bythirds(size_t size) +{ + int n; + size_t m; + + m = size; + for (n = 0; m; n++) + m >>= 1; + if (n < 3) + return size + 1; + n -= 3; + m = size >> n; + m += m == 6 ? 2 : 1; + m <<= n; + return m > size ? m : (size_t)(-1); +} + +/* Read the input file *name, or stdin if name is NULL, into allocated memory. + Reallocate to larger buffers until the entire file is read in. Return a + pointer to the allocated data, or NULL if there was a memory allocation + failure. *len is the number of bytes of data read from the input file (even + if load() returns NULL). If the input file was empty or could not be opened + or read, *len is zero. */ +local void *load(const char *name, size_t *len) +{ + size_t size; + void *buf, *swap; + FILE *in; + + *len = 0; + buf = malloc(size = 4096); + if (buf == NULL) + return NULL; + in = name == NULL ? stdin : fopen(name, "rb"); + if (in != NULL) { + for (;;) { + *len += fread((char *)buf + *len, 1, size - *len, in); + if (*len < size) break; + size = bythirds(size); + if (size == *len || (swap = realloc(buf, size)) == NULL) { + free(buf); + buf = NULL; + break; + } + buf = swap; + } + fclose(in); + } + return buf; +} + +int main(int argc, char **argv) +{ + int ret, put = 0, fail = 0; + unsigned skip = 0; + char *arg, *name = NULL; + unsigned char *source = NULL, *dest; + size_t len = 0; + unsigned long sourcelen, destlen; + + /* process arguments */ + while (arg = *++argv, --argc) + if (arg[0] == '-') { + if (arg[1] == 'w' && arg[2] == 0) + put = 1; + else if (arg[1] == 'f' && arg[2] == 0) + fail = 1, put = 1; + else if (arg[1] >= '0' && arg[1] <= '9') + skip = (unsigned)atoi(arg + 1); + else { + fprintf(stderr, "invalid option %s\n", arg); + return 3; + } + } + else if (name != NULL) { + fprintf(stderr, "only one file name allowed\n"); + return 3; + } + else + name = arg; + source = load(name, &len); + if (source == NULL) { + fprintf(stderr, "memory allocation failure\n"); + return 4; + } + if (len == 0) { + fprintf(stderr, "could not read %s, or it was empty\n", + name == NULL ? "" : name); + free(source); + return 3; + } + if (skip >= len) { + fprintf(stderr, "skip request of %d leaves no input\n", skip); + free(source); + return 3; + } + + /* test inflate data with offset skip */ + len -= skip; + sourcelen = (unsigned long)len; + ret = puff(NIL, &destlen, source + skip, &sourcelen); + if (ret) + fprintf(stderr, "puff() failed with return code %d\n", ret); + else { + fprintf(stderr, "puff() succeeded uncompressing %lu bytes\n", destlen); + if (sourcelen < len) fprintf(stderr, "%lu compressed bytes unused\n", + len - sourcelen); + } + + /* if requested, inflate again and write decompressed data to stdout */ + if (put && ret == 0) { + if (fail) + destlen >>= 1; + dest = malloc(destlen); + if (dest == NULL) { + fprintf(stderr, "memory allocation failure\n"); + free(source); + return 4; + } + puff(dest, &destlen, source + skip, &sourcelen); + SET_BINARY_MODE(stdout); + fwrite(dest, 1, destlen, stdout); + free(dest); + } + + /* clean up */ + free(source); + return ret; +} diff --git a/c-blosc/internal-complibs/zlib-1.3.1/contrib/testzlib/testzlib.c b/c-blosc/internal-complibs/zlib-1.3.1/contrib/testzlib/testzlib.c new file mode 100644 index 0000000..c4b0148 --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.3.1/contrib/testzlib/testzlib.c @@ -0,0 +1,275 @@ +#include +#include +#include + +#include "zlib.h" + + +void MyDoMinus64(LARGE_INTEGER *R,LARGE_INTEGER A,LARGE_INTEGER B) +{ + R->HighPart = A.HighPart - B.HighPart; + if (A.LowPart >= B.LowPart) + R->LowPart = A.LowPart - B.LowPart; + else + { + R->LowPart = A.LowPart - B.LowPart; + R->HighPart --; + } +} + +#ifdef _M_X64 +// see http://msdn2.microsoft.com/library/twchhe95(en-us,vs.80).aspx for __rdtsc +unsigned __int64 __rdtsc(void); +void BeginCountRdtsc(LARGE_INTEGER * pbeginTime64) +{ + // printf("rdtsc = %I64x\n",__rdtsc()); + pbeginTime64->QuadPart=__rdtsc(); +} + +LARGE_INTEGER GetResRdtsc(LARGE_INTEGER beginTime64,BOOL fComputeTimeQueryPerf) +{ + LARGE_INTEGER LIres; + unsigned _int64 res=__rdtsc()-((unsigned _int64)(beginTime64.QuadPart)); + LIres.QuadPart=res; + // printf("rdtsc = %I64x\n",__rdtsc()); + return LIres; +} +#else +#ifdef _M_IX86 +void myGetRDTSC32(LARGE_INTEGER * pbeginTime64) +{ + DWORD dwEdx,dwEax; + _asm + { + rdtsc + mov dwEax,eax + mov dwEdx,edx + } + pbeginTime64->LowPart=dwEax; + pbeginTime64->HighPart=dwEdx; +} + +void BeginCountRdtsc(LARGE_INTEGER * pbeginTime64) +{ + myGetRDTSC32(pbeginTime64); +} + +LARGE_INTEGER GetResRdtsc(LARGE_INTEGER beginTime64,BOOL fComputeTimeQueryPerf) +{ + LARGE_INTEGER LIres,endTime64; + myGetRDTSC32(&endTime64); + + LIres.LowPart=LIres.HighPart=0; + MyDoMinus64(&LIres,endTime64,beginTime64); + return LIres; +} +#else +void myGetRDTSC32(LARGE_INTEGER * pbeginTime64) +{ +} + +void BeginCountRdtsc(LARGE_INTEGER * pbeginTime64) +{ +} + +LARGE_INTEGER GetResRdtsc(LARGE_INTEGER beginTime64,BOOL fComputeTimeQueryPerf) +{ + LARGE_INTEGER lr; + lr.QuadPart=0; + return lr; +} +#endif +#endif + +void BeginCountPerfCounter(LARGE_INTEGER * pbeginTime64,BOOL fComputeTimeQueryPerf) +{ + if ((!fComputeTimeQueryPerf) || (!QueryPerformanceCounter(pbeginTime64))) + { + pbeginTime64->LowPart = GetTickCount(); + pbeginTime64->HighPart = 0; + } +} + +DWORD GetMsecSincePerfCounter(LARGE_INTEGER beginTime64,BOOL fComputeTimeQueryPerf) +{ + LARGE_INTEGER endTime64,ticksPerSecond,ticks; + DWORDLONG ticksShifted,tickSecShifted; + DWORD dwLog=16+0; + DWORD dwRet; + if ((!fComputeTimeQueryPerf) || (!QueryPerformanceCounter(&endTime64))) + dwRet = (GetTickCount() - beginTime64.LowPart)*1; + else + { + MyDoMinus64(&ticks,endTime64,beginTime64); + QueryPerformanceFrequency(&ticksPerSecond); + + + { + ticksShifted = Int64ShrlMod32(*(DWORDLONG*)&ticks,dwLog); + tickSecShifted = Int64ShrlMod32(*(DWORDLONG*)&ticksPerSecond,dwLog); + + } + + dwRet = (DWORD)((((DWORD)ticksShifted)*1000)/(DWORD)(tickSecShifted)); + dwRet *=1; + } + return dwRet; +} + +int ReadFileMemory(const char* filename,long* plFileSize,unsigned char** pFilePtr) +{ + FILE* stream; + unsigned char* ptr; + int retVal=1; + stream=fopen(filename, "rb"); + if (stream==NULL) + return 0; + + fseek(stream,0,SEEK_END); + + *plFileSize=ftell(stream); + fseek(stream,0,SEEK_SET); + ptr=malloc((*plFileSize)+1); + if (ptr==NULL) + retVal=0; + else + { + if (fread(ptr, 1, *plFileSize,stream) != (*plFileSize)) + retVal=0; + } + fclose(stream); + *pFilePtr=ptr; + return retVal; +} + +int main(int argc, char *argv[]) +{ + int BlockSizeCompress=0x8000; + int BlockSizeUncompress=0x8000; + int cprLevel=Z_DEFAULT_COMPRESSION ; + long lFileSize; + unsigned char* FilePtr; + long lBufferSizeCpr; + long lBufferSizeUncpr; + long lCompressedSize=0; + unsigned char* CprPtr; + unsigned char* UncprPtr; + long lSizeCpr,lSizeUncpr; + DWORD dwGetTick,dwMsecQP; + LARGE_INTEGER li_qp,li_rdtsc,dwResRdtsc; + + if (argc<=1) + { + printf("run TestZlib [BlockSizeCompress] [BlockSizeUncompress] [compres. level]\n"); + return 0; + } + + if (ReadFileMemory(argv[1],&lFileSize,&FilePtr)==0) + { + printf("error reading %s\n",argv[1]); + return 1; + } + else printf("file %s read, %ld bytes\n",argv[1],lFileSize); + + if (argc>=3) + BlockSizeCompress=atol(argv[2]); + + if (argc>=4) + BlockSizeUncompress=atol(argv[3]); + + if (argc>=5) + cprLevel=(int)atol(argv[4]); + + lBufferSizeCpr = lFileSize + (lFileSize/0x10) + 0x200; + lBufferSizeUncpr = lBufferSizeCpr; + + CprPtr=(unsigned char*)malloc(lBufferSizeCpr + BlockSizeCompress); + + BeginCountPerfCounter(&li_qp,TRUE); + dwGetTick=GetTickCount(); + BeginCountRdtsc(&li_rdtsc); + { + z_stream zcpr; + int ret=Z_OK; + long lOrigToDo = lFileSize; + long lOrigDone = 0; + int step=0; + memset(&zcpr,0,sizeof(z_stream)); + deflateInit(&zcpr,cprLevel); + + zcpr.next_in = FilePtr; + zcpr.next_out = CprPtr; + + + do + { + long all_read_before = zcpr.total_in; + zcpr.avail_in = min(lOrigToDo,BlockSizeCompress); + zcpr.avail_out = BlockSizeCompress; + ret=deflate(&zcpr,(zcpr.avail_in==lOrigToDo) ? Z_FINISH : Z_SYNC_FLUSH); + lOrigDone += (zcpr.total_in-all_read_before); + lOrigToDo -= (zcpr.total_in-all_read_before); + step++; + } while (ret==Z_OK); + + lSizeCpr=zcpr.total_out; + deflateEnd(&zcpr); + dwGetTick=GetTickCount()-dwGetTick; + dwMsecQP=GetMsecSincePerfCounter(li_qp,TRUE); + dwResRdtsc=GetResRdtsc(li_rdtsc,TRUE); + printf("total compress size = %u, in %u step\n",lSizeCpr,step); + printf("time = %u msec = %f sec\n",dwGetTick,dwGetTick/(double)1000.); + printf("defcpr time QP = %u msec = %f sec\n",dwMsecQP,dwMsecQP/(double)1000.); + printf("defcpr result rdtsc = %I64x\n\n",dwResRdtsc.QuadPart); + } + + CprPtr=(unsigned char*)realloc(CprPtr,lSizeCpr); + UncprPtr=(unsigned char*)malloc(lBufferSizeUncpr + BlockSizeUncompress); + + BeginCountPerfCounter(&li_qp,TRUE); + dwGetTick=GetTickCount(); + BeginCountRdtsc(&li_rdtsc); + { + z_stream zcpr; + int ret=Z_OK; + long lOrigToDo = lSizeCpr; + long lOrigDone = 0; + int step=0; + memset(&zcpr,0,sizeof(z_stream)); + inflateInit(&zcpr); + + zcpr.next_in = CprPtr; + zcpr.next_out = UncprPtr; + + + do + { + long all_read_before = zcpr.total_in; + zcpr.avail_in = min(lOrigToDo,BlockSizeUncompress); + zcpr.avail_out = BlockSizeUncompress; + ret=inflate(&zcpr,Z_SYNC_FLUSH); + lOrigDone += (zcpr.total_in-all_read_before); + lOrigToDo -= (zcpr.total_in-all_read_before); + step++; + } while (ret==Z_OK); + + lSizeUncpr=zcpr.total_out; + inflateEnd(&zcpr); + dwGetTick=GetTickCount()-dwGetTick; + dwMsecQP=GetMsecSincePerfCounter(li_qp,TRUE); + dwResRdtsc=GetResRdtsc(li_rdtsc,TRUE); + printf("total uncompress size = %u, in %u step\n",lSizeUncpr,step); + printf("time = %u msec = %f sec\n",dwGetTick,dwGetTick/(double)1000.); + printf("uncpr time QP = %u msec = %f sec\n",dwMsecQP,dwMsecQP/(double)1000.); + printf("uncpr result rdtsc = %I64x\n\n",dwResRdtsc.QuadPart); + } + + if (lSizeUncpr==lFileSize) + { + if (memcmp(FilePtr,UncprPtr,lFileSize)==0) + printf("compare ok\n"); + + } + + return 0; +} diff --git a/c-blosc/internal-complibs/zlib-1.3.1/contrib/untgz/untgz.c b/c-blosc/internal-complibs/zlib-1.3.1/contrib/untgz/untgz.c new file mode 100644 index 0000000..7857921 --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.3.1/contrib/untgz/untgz.c @@ -0,0 +1,667 @@ +/* + * untgz.c -- Display contents and extract files from a gzip'd TAR file + * + * written by Pedro A. Aranda Gutierrez + * adaptation to Unix by Jean-loup Gailly + * various fixes by Cosmin Truta + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +#include +#include +#include +#include +#include + +#include "zlib.h" + +#ifdef _WIN32 +# include +# include +# include +# ifndef F_OK +# define F_OK 0 +# endif +# define mkdir(dirname,mode) _mkdir(dirname) +# ifdef _MSC_VER +# define access(path,mode) _access(path,mode) +# define chmod(path,mode) _chmod(path,mode) +# define strdup(str) _strdup(str) +# endif +#else +# include +# include +# include +#endif + + +/* values used in typeflag field */ + +#define REGTYPE '0' /* regular file */ +#define AREGTYPE '\0' /* regular file */ +#define LNKTYPE '1' /* link */ +#define SYMTYPE '2' /* reserved */ +#define CHRTYPE '3' /* character special */ +#define BLKTYPE '4' /* block special */ +#define DIRTYPE '5' /* directory */ +#define FIFOTYPE '6' /* FIFO special */ +#define CONTTYPE '7' /* reserved */ + +/* GNU tar extensions */ + +#define GNUTYPE_DUMPDIR 'D' /* file names from dumped directory */ +#define GNUTYPE_LONGLINK 'K' /* long link name */ +#define GNUTYPE_LONGNAME 'L' /* long file name */ +#define GNUTYPE_MULTIVOL 'M' /* continuation of file from another volume */ +#define GNUTYPE_NAMES 'N' /* file name that does not fit into main hdr */ +#define GNUTYPE_SPARSE 'S' /* sparse file */ +#define GNUTYPE_VOLHDR 'V' /* tape/volume header */ + + +/* tar header */ + +#define BLOCKSIZE 512 +#define SHORTNAMESIZE 100 + +struct tar_header +{ /* byte offset */ + char name[100]; /* 0 */ + char mode[8]; /* 100 */ + char uid[8]; /* 108 */ + char gid[8]; /* 116 */ + char size[12]; /* 124 */ + char mtime[12]; /* 136 */ + char chksum[8]; /* 148 */ + char typeflag; /* 156 */ + char linkname[100]; /* 157 */ + char magic[6]; /* 257 */ + char version[2]; /* 263 */ + char uname[32]; /* 265 */ + char gname[32]; /* 297 */ + char devmajor[8]; /* 329 */ + char devminor[8]; /* 337 */ + char prefix[155]; /* 345 */ + /* 500 */ +}; + +union tar_buffer +{ + char buffer[BLOCKSIZE]; + struct tar_header header; +}; + +struct attr_item +{ + struct attr_item *next; + char *fname; + int mode; + time_t time; +}; + +enum { TGZ_EXTRACT, TGZ_LIST, TGZ_INVALID }; + +char *prog; + +void error(const char *msg) +{ + fprintf(stderr, "%s: %s\n", prog, msg); + exit(1); +} + +const char *TGZsuffix[] = { "\0", ".tar", ".tar.gz", ".taz", ".tgz", NULL }; + +/* return the file name of the TGZ archive */ +/* or NULL if it does not exist */ + +char *TGZfname (const char *arcname) +{ + static char buffer[1024]; + int origlen,i; + + strcpy(buffer,arcname); + origlen = strlen(buffer); + + for (i=0; TGZsuffix[i]; i++) + { + strcpy(buffer+origlen,TGZsuffix[i]); + if (access(buffer,F_OK) == 0) + return buffer; + } + return NULL; +} + + +/* error message for the filename */ + +void TGZnotfound (const char *arcname) +{ + int i; + + fprintf(stderr,"%s: Couldn't find ",prog); + for (i=0;TGZsuffix[i];i++) + fprintf(stderr,(TGZsuffix[i+1]) ? "%s%s, " : "or %s%s\n", + arcname, + TGZsuffix[i]); + exit(1); +} + + +/* convert octal digits to int */ +/* on error return -1 */ + +int getoct (char *p,int width) +{ + int result = 0; + char c; + + while (width--) + { + c = *p++; + if (c == 0) + break; + if (c == ' ') + continue; + if (c < '0' || c > '7') + return -1; + result = result * 8 + (c - '0'); + } + return result; +} + + +/* convert time_t to string */ +/* use the "YYYY/MM/DD hh:mm:ss" format */ + +char *strtime (time_t *t) +{ + struct tm *local; + static char result[32]; + + local = localtime(t); + sprintf(result,"%4d/%02d/%02d %02d:%02d:%02d", + local->tm_year+1900, local->tm_mon+1, local->tm_mday, + local->tm_hour, local->tm_min, local->tm_sec); + return result; +} + + +/* set file time */ + +int setfiletime (char *fname,time_t ftime) +{ +#ifdef _WIN32 + static int isWinNT = -1; + SYSTEMTIME st; + FILETIME locft, modft; + struct tm *loctm; + HANDLE hFile; + int result; + + loctm = localtime(&ftime); + if (loctm == NULL) + return -1; + + st.wYear = (WORD)loctm->tm_year + 1900; + st.wMonth = (WORD)loctm->tm_mon + 1; + st.wDayOfWeek = (WORD)loctm->tm_wday; + st.wDay = (WORD)loctm->tm_mday; + st.wHour = (WORD)loctm->tm_hour; + st.wMinute = (WORD)loctm->tm_min; + st.wSecond = (WORD)loctm->tm_sec; + st.wMilliseconds = 0; + if (!SystemTimeToFileTime(&st, &locft) || + !LocalFileTimeToFileTime(&locft, &modft)) + return -1; + + if (isWinNT < 0) + isWinNT = (GetVersion() < 0x80000000) ? 1 : 0; + hFile = CreateFile(fname, GENERIC_WRITE, 0, NULL, OPEN_EXISTING, + (isWinNT ? FILE_FLAG_BACKUP_SEMANTICS : 0), + NULL); + if (hFile == INVALID_HANDLE_VALUE) + return -1; + result = SetFileTime(hFile, NULL, NULL, &modft) ? 0 : -1; + CloseHandle(hFile); + return result; +#else + struct utimbuf settime; + + settime.actime = settime.modtime = ftime; + return utime(fname,&settime); +#endif +} + + +/* push file attributes */ + +void push_attr(struct attr_item **list,char *fname,int mode,time_t time) +{ + struct attr_item *item; + + item = (struct attr_item *)malloc(sizeof(struct attr_item)); + if (item == NULL) + error("Out of memory"); + item->fname = strdup(fname); + item->mode = mode; + item->time = time; + item->next = *list; + *list = item; +} + + +/* restore file attributes */ + +void restore_attr(struct attr_item **list) +{ + struct attr_item *item, *prev; + + for (item = *list; item != NULL; ) + { + setfiletime(item->fname,item->time); + chmod(item->fname,item->mode); + prev = item; + item = item->next; + free(prev); + } + *list = NULL; +} + + +/* match regular expression */ + +#define ISSPECIAL(c) (((c) == '*') || ((c) == '/')) + +int ExprMatch (char *string,char *expr) +{ + while (1) + { + if (ISSPECIAL(*expr)) + { + if (*expr == '/') + { + if (*string != '\\' && *string != '/') + return 0; + string ++; expr++; + } + else if (*expr == '*') + { + if (*expr ++ == 0) + return 1; + while (*++string != *expr) + if (*string == 0) + return 0; + } + } + else + { + if (*string != *expr) + return 0; + if (*expr++ == 0) + return 1; + string++; + } + } +} + + +/* recursive mkdir */ +/* abort on ENOENT; ignore other errors like "directory already exists" */ +/* return 1 if OK */ +/* 0 on error */ + +int makedir (char *newdir) +{ + char *buffer = strdup(newdir); + char *p; + int len = strlen(buffer); + + if (len <= 0) { + free(buffer); + return 0; + } + if (buffer[len-1] == '/') { + buffer[len-1] = '\0'; + } + if (mkdir(buffer, 0755) == 0) + { + free(buffer); + return 1; + } + + p = buffer+1; + while (1) + { + char hold; + + while(*p && *p != '\\' && *p != '/') + p++; + hold = *p; + *p = 0; + if ((mkdir(buffer, 0755) == -1) && (errno == ENOENT)) + { + fprintf(stderr,"%s: Couldn't create directory %s\n",prog,buffer); + free(buffer); + return 0; + } + if (hold == 0) + break; + *p++ = hold; + } + free(buffer); + return 1; +} + + +int matchname (int arg,int argc,char **argv,char *fname) +{ + if (arg == argc) /* no arguments given (untgz tgzarchive) */ + return 1; + + while (arg < argc) + if (ExprMatch(fname,argv[arg++])) + return 1; + + return 0; /* ignore this for the moment being */ +} + + +/* tar file list or extract */ + +int tar (gzFile in,int action,int arg,int argc,char **argv) +{ + union tar_buffer buffer; + int len; + int err; + int getheader = 1; + int remaining = 0; + FILE *outfile = NULL; + char fname[BLOCKSIZE]; + int tarmode; + time_t tartime; + struct attr_item *attributes = NULL; + + if (action == TGZ_LIST) + printf(" date time size file\n" + " ---------- -------- --------- -------------------------------------\n"); + while (1) + { + len = gzread(in, &buffer, BLOCKSIZE); + if (len < 0) + error(gzerror(in, &err)); + /* + * Always expect complete blocks to process + * the tar information. + */ + if (len != BLOCKSIZE) + { + action = TGZ_INVALID; /* force error exit */ + remaining = 0; /* force I/O cleanup */ + } + + /* + * If we have to get a tar header + */ + if (getheader >= 1) + { + /* + * if we met the end of the tar + * or the end-of-tar block, + * we are done + */ + if (len == 0 || buffer.header.name[0] == 0) + break; + + tarmode = getoct(buffer.header.mode,8); + tartime = (time_t)getoct(buffer.header.mtime,12); + if (tarmode == -1 || tartime == (time_t)-1) + { + buffer.header.name[0] = 0; + action = TGZ_INVALID; + } + + if (getheader == 1) + { + strncpy(fname,buffer.header.name,SHORTNAMESIZE); + if (fname[SHORTNAMESIZE-1] != 0) + fname[SHORTNAMESIZE] = 0; + } + else + { + /* + * The file name is longer than SHORTNAMESIZE + */ + if (strncmp(fname,buffer.header.name,SHORTNAMESIZE-1) != 0) + error("bad long name"); + getheader = 1; + } + + /* + * Act according to the type flag + */ + switch (buffer.header.typeflag) + { + case DIRTYPE: + if (action == TGZ_LIST) + printf(" %s %s\n",strtime(&tartime),fname); + if (action == TGZ_EXTRACT) + { + makedir(fname); + push_attr(&attributes,fname,tarmode,tartime); + } + break; + case REGTYPE: + case AREGTYPE: + remaining = getoct(buffer.header.size,12); + if (remaining == -1) + { + action = TGZ_INVALID; + break; + } + if (action == TGZ_LIST) + printf(" %s %9d %s\n",strtime(&tartime),remaining,fname); + else if (action == TGZ_EXTRACT) + { + if (matchname(arg,argc,argv,fname)) + { + outfile = fopen(fname,"wb"); + if (outfile == NULL) { + /* try creating directory */ + char *p = strrchr(fname, '/'); + if (p != NULL) { + *p = '\0'; + makedir(fname); + *p = '/'; + outfile = fopen(fname,"wb"); + } + } + if (outfile != NULL) + printf("Extracting %s\n",fname); + else + fprintf(stderr, "%s: Couldn't create %s",prog,fname); + } + else + outfile = NULL; + } + getheader = 0; + break; + case GNUTYPE_LONGLINK: + case GNUTYPE_LONGNAME: + remaining = getoct(buffer.header.size,12); + if (remaining < 0 || remaining >= BLOCKSIZE) + { + action = TGZ_INVALID; + break; + } + len = gzread(in, fname, BLOCKSIZE); + if (len < 0) + error(gzerror(in, &err)); + if (fname[BLOCKSIZE-1] != 0 || (int)strlen(fname) > remaining) + { + action = TGZ_INVALID; + break; + } + getheader = 2; + break; + default: + if (action == TGZ_LIST) + printf(" %s <---> %s\n",strtime(&tartime),fname); + break; + } + } + else + { + unsigned int bytes = (remaining > BLOCKSIZE) ? BLOCKSIZE : remaining; + + if (outfile != NULL) + { + if (fwrite(&buffer,sizeof(char),bytes,outfile) != bytes) + { + fprintf(stderr, + "%s: Error writing %s -- skipping\n",prog,fname); + fclose(outfile); + outfile = NULL; + remove(fname); + } + } + remaining -= bytes; + } + + if (remaining == 0) + { + getheader = 1; + if (outfile != NULL) + { + fclose(outfile); + outfile = NULL; + if (action != TGZ_INVALID) + push_attr(&attributes,fname,tarmode,tartime); + } + } + + /* + * Abandon if errors are found + */ + if (action == TGZ_INVALID) + { + error("broken archive"); + break; + } + } + + /* + * Restore file modes and time stamps + */ + restore_attr(&attributes); + + if (gzclose(in) != Z_OK) + error("failed gzclose"); + + return 0; +} + + +/* ============================================================ */ + +void help(int exitval) +{ + printf("untgz version 0.2.1\n" + " using zlib version %s\n\n", + zlibVersion()); + printf("Usage: untgz file.tgz extract all files\n" + " untgz file.tgz fname ... extract selected files\n" + " untgz -l file.tgz list archive contents\n" + " untgz -h display this help\n"); + exit(exitval); +} + + +/* ============================================================ */ + +#if defined(WIN32) && defined(__GNUC__) +int _CRT_glob = 0; /* disable argument globbing in MinGW */ +#endif + +int main(int argc,char **argv) +{ + int action = TGZ_EXTRACT; + int arg = 1; + char *TGZfile; + gzFile f; + + prog = strrchr(argv[0],'\\'); + if (prog == NULL) + { + prog = strrchr(argv[0],'/'); + if (prog == NULL) + { + prog = strrchr(argv[0],':'); + if (prog == NULL) + prog = argv[0]; + else + prog++; + } + else + prog++; + } + else + prog++; + + if (argc == 1) + help(0); + + if (strcmp(argv[arg],"-l") == 0) + { + action = TGZ_LIST; + if (argc == ++arg) + help(0); + } + else if (strcmp(argv[arg],"-h") == 0) + { + help(0); + } + + if ((TGZfile = TGZfname(argv[arg])) == NULL) + TGZnotfound(argv[arg]); + + ++arg; + if ((action == TGZ_LIST) && (arg != argc)) + help(1); + +/* + * Process the TGZ file + */ + switch(action) + { + case TGZ_LIST: + case TGZ_EXTRACT: + f = gzopen(TGZfile,"rb"); + if (f == NULL) + { + fprintf(stderr,"%s: Couldn't gzopen %s\n",prog,TGZfile); + return 1; + } + exit(tar(f, action, arg, argc, argv)); + break; + + default: + error("Unknown option"); + exit(1); + } + + return 0; +} diff --git a/c-blosc/internal-complibs/zlib-1.3.1/crc32.c b/c-blosc/internal-complibs/zlib-1.3.1/crc32.c new file mode 100644 index 0000000..6c38f5c --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.3.1/crc32.c @@ -0,0 +1,1049 @@ +/* crc32.c -- compute the CRC-32 of a data stream + * Copyright (C) 1995-2022 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + * + * This interleaved implementation of a CRC makes use of pipelined multiple + * arithmetic-logic units, commonly found in modern CPU cores. It is due to + * Kadatch and Jenkins (2010). See doc/crc-doc.1.0.pdf in this distribution. + */ + +/* @(#) $Id$ */ + +/* + Note on the use of DYNAMIC_CRC_TABLE: there is no mutex or semaphore + protection on the static variables used to control the first-use generation + of the crc tables. Therefore, if you #define DYNAMIC_CRC_TABLE, you should + first call get_crc_table() to initialize the tables before allowing more than + one thread to use crc32(). + + MAKECRCH can be #defined to write out crc32.h. A main() routine is also + produced, so that this one source file can be compiled to an executable. + */ + +#ifdef MAKECRCH +# include +# ifndef DYNAMIC_CRC_TABLE +# define DYNAMIC_CRC_TABLE +# endif /* !DYNAMIC_CRC_TABLE */ +#endif /* MAKECRCH */ + +#include "zutil.h" /* for Z_U4, Z_U8, z_crc_t, and FAR definitions */ + + /* + A CRC of a message is computed on N braids of words in the message, where + each word consists of W bytes (4 or 8). If N is 3, for example, then three + running sparse CRCs are calculated respectively on each braid, at these + indices in the array of words: 0, 3, 6, ..., 1, 4, 7, ..., and 2, 5, 8, ... + This is done starting at a word boundary, and continues until as many blocks + of N * W bytes as are available have been processed. The results are combined + into a single CRC at the end. For this code, N must be in the range 1..6 and + W must be 4 or 8. The upper limit on N can be increased if desired by adding + more #if blocks, extending the patterns apparent in the code. In addition, + crc32.h would need to be regenerated, if the maximum N value is increased. + + N and W are chosen empirically by benchmarking the execution time on a given + processor. The choices for N and W below were based on testing on Intel Kaby + Lake i7, AMD Ryzen 7, ARM Cortex-A57, Sparc64-VII, PowerPC POWER9, and MIPS64 + Octeon II processors. The Intel, AMD, and ARM processors were all fastest + with N=5, W=8. The Sparc, PowerPC, and MIPS64 were all fastest at N=5, W=4. + They were all tested with either gcc or clang, all using the -O3 optimization + level. Your mileage may vary. + */ + +/* Define N */ +#ifdef Z_TESTN +# define N Z_TESTN +#else +# define N 5 +#endif +#if N < 1 || N > 6 +# error N must be in 1..6 +#endif + +/* + z_crc_t must be at least 32 bits. z_word_t must be at least as long as + z_crc_t. It is assumed here that z_word_t is either 32 bits or 64 bits, and + that bytes are eight bits. + */ + +/* + Define W and the associated z_word_t type. If W is not defined, then a + braided calculation is not used, and the associated tables and code are not + compiled. + */ +#ifdef Z_TESTW +# if Z_TESTW-1 != -1 +# define W Z_TESTW +# endif +#else +# ifdef MAKECRCH +# define W 8 /* required for MAKECRCH */ +# else +# if defined(__x86_64__) || defined(__aarch64__) +# define W 8 +# else +# define W 4 +# endif +# endif +#endif +#ifdef W +# if W == 8 && defined(Z_U8) + typedef Z_U8 z_word_t; +# elif defined(Z_U4) +# undef W +# define W 4 + typedef Z_U4 z_word_t; +# else +# undef W +# endif +#endif + +/* If available, use the ARM processor CRC32 instruction. */ +#if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) && W == 8 +# define ARMCRC32 +#endif + +#if defined(W) && (!defined(ARMCRC32) || defined(DYNAMIC_CRC_TABLE)) +/* + Swap the bytes in a z_word_t to convert between little and big endian. Any + self-respecting compiler will optimize this to a single machine byte-swap + instruction, if one is available. This assumes that word_t is either 32 bits + or 64 bits. + */ +local z_word_t byte_swap(z_word_t word) { +# if W == 8 + return + (word & 0xff00000000000000) >> 56 | + (word & 0xff000000000000) >> 40 | + (word & 0xff0000000000) >> 24 | + (word & 0xff00000000) >> 8 | + (word & 0xff000000) << 8 | + (word & 0xff0000) << 24 | + (word & 0xff00) << 40 | + (word & 0xff) << 56; +# else /* W == 4 */ + return + (word & 0xff000000) >> 24 | + (word & 0xff0000) >> 8 | + (word & 0xff00) << 8 | + (word & 0xff) << 24; +# endif +} +#endif + +#ifdef DYNAMIC_CRC_TABLE +/* ========================================================================= + * Table of powers of x for combining CRC-32s, filled in by make_crc_table() + * below. + */ + local z_crc_t FAR x2n_table[32]; +#else +/* ========================================================================= + * Tables for byte-wise and braided CRC-32 calculations, and a table of powers + * of x for combining CRC-32s, all made by make_crc_table(). + */ +# include "crc32.h" +#endif + +/* CRC polynomial. */ +#define POLY 0xedb88320 /* p(x) reflected, with x^32 implied */ + +/* + Return a(x) multiplied by b(x) modulo p(x), where p(x) is the CRC polynomial, + reflected. For speed, this requires that a not be zero. + */ +local z_crc_t multmodp(z_crc_t a, z_crc_t b) { + z_crc_t m, p; + + m = (z_crc_t)1 << 31; + p = 0; + for (;;) { + if (a & m) { + p ^= b; + if ((a & (m - 1)) == 0) + break; + } + m >>= 1; + b = b & 1 ? (b >> 1) ^ POLY : b >> 1; + } + return p; +} + +/* + Return x^(n * 2^k) modulo p(x). Requires that x2n_table[] has been + initialized. + */ +local z_crc_t x2nmodp(z_off64_t n, unsigned k) { + z_crc_t p; + + p = (z_crc_t)1 << 31; /* x^0 == 1 */ + while (n) { + if (n & 1) + p = multmodp(x2n_table[k & 31], p); + n >>= 1; + k++; + } + return p; +} + +#ifdef DYNAMIC_CRC_TABLE +/* ========================================================================= + * Build the tables for byte-wise and braided CRC-32 calculations, and a table + * of powers of x for combining CRC-32s. + */ +local z_crc_t FAR crc_table[256]; +#ifdef W + local z_word_t FAR crc_big_table[256]; + local z_crc_t FAR crc_braid_table[W][256]; + local z_word_t FAR crc_braid_big_table[W][256]; + local void braid(z_crc_t [][256], z_word_t [][256], int, int); +#endif +#ifdef MAKECRCH + local void write_table(FILE *, const z_crc_t FAR *, int); + local void write_table32hi(FILE *, const z_word_t FAR *, int); + local void write_table64(FILE *, const z_word_t FAR *, int); +#endif /* MAKECRCH */ + +/* + Define a once() function depending on the availability of atomics. If this is + compiled with DYNAMIC_CRC_TABLE defined, and if CRCs will be computed in + multiple threads, and if atomics are not available, then get_crc_table() must + be called to initialize the tables and must return before any threads are + allowed to compute or combine CRCs. + */ + +/* Definition of once functionality. */ +typedef struct once_s once_t; + +/* Check for the availability of atomics. */ +#if defined(__STDC__) && __STDC_VERSION__ >= 201112L && \ + !defined(__STDC_NO_ATOMICS__) + +#include + +/* Structure for once(), which must be initialized with ONCE_INIT. */ +struct once_s { + atomic_flag begun; + atomic_int done; +}; +#define ONCE_INIT {ATOMIC_FLAG_INIT, 0} + +/* + Run the provided init() function exactly once, even if multiple threads + invoke once() at the same time. The state must be a once_t initialized with + ONCE_INIT. + */ +local void once(once_t *state, void (*init)(void)) { + if (!atomic_load(&state->done)) { + if (atomic_flag_test_and_set(&state->begun)) + while (!atomic_load(&state->done)) + ; + else { + init(); + atomic_store(&state->done, 1); + } + } +} + +#else /* no atomics */ + +/* Structure for once(), which must be initialized with ONCE_INIT. */ +struct once_s { + volatile int begun; + volatile int done; +}; +#define ONCE_INIT {0, 0} + +/* Test and set. Alas, not atomic, but tries to minimize the period of + vulnerability. */ +local int test_and_set(int volatile *flag) { + int was; + + was = *flag; + *flag = 1; + return was; +} + +/* Run the provided init() function once. This is not thread-safe. */ +local void once(once_t *state, void (*init)(void)) { + if (!state->done) { + if (test_and_set(&state->begun)) + while (!state->done) + ; + else { + init(); + state->done = 1; + } + } +} + +#endif + +/* State for once(). */ +local once_t made = ONCE_INIT; + +/* + Generate tables for a byte-wise 32-bit CRC calculation on the polynomial: + x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1. + + Polynomials over GF(2) are represented in binary, one bit per coefficient, + with the lowest powers in the most significant bit. Then adding polynomials + is just exclusive-or, and multiplying a polynomial by x is a right shift by + one. If we call the above polynomial p, and represent a byte as the + polynomial q, also with the lowest power in the most significant bit (so the + byte 0xb1 is the polynomial x^7+x^3+x^2+1), then the CRC is (q*x^32) mod p, + where a mod b means the remainder after dividing a by b. + + This calculation is done using the shift-register method of multiplying and + taking the remainder. The register is initialized to zero, and for each + incoming bit, x^32 is added mod p to the register if the bit is a one (where + x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by x + (which is shifting right by one and adding x^32 mod p if the bit shifted out + is a one). We start with the highest power (least significant bit) of q and + repeat for all eight bits of q. + + The table is simply the CRC of all possible eight bit values. This is all the + information needed to generate CRCs on data a byte at a time for all + combinations of CRC register values and incoming bytes. + */ + +local void make_crc_table(void) { + unsigned i, j, n; + z_crc_t p; + + /* initialize the CRC of bytes tables */ + for (i = 0; i < 256; i++) { + p = i; + for (j = 0; j < 8; j++) + p = p & 1 ? (p >> 1) ^ POLY : p >> 1; + crc_table[i] = p; +#ifdef W + crc_big_table[i] = byte_swap(p); +#endif + } + + /* initialize the x^2^n mod p(x) table */ + p = (z_crc_t)1 << 30; /* x^1 */ + x2n_table[0] = p; + for (n = 1; n < 32; n++) + x2n_table[n] = p = multmodp(p, p); + +#ifdef W + /* initialize the braiding tables -- needs x2n_table[] */ + braid(crc_braid_table, crc_braid_big_table, N, W); +#endif + +#ifdef MAKECRCH + { + /* + The crc32.h header file contains tables for both 32-bit and 64-bit + z_word_t's, and so requires a 64-bit type be available. In that case, + z_word_t must be defined to be 64-bits. This code then also generates + and writes out the tables for the case that z_word_t is 32 bits. + */ +#if !defined(W) || W != 8 +# error Need a 64-bit integer type in order to generate crc32.h. +#endif + FILE *out; + int k, n; + z_crc_t ltl[8][256]; + z_word_t big[8][256]; + + out = fopen("crc32.h", "w"); + if (out == NULL) return; + + /* write out little-endian CRC table to crc32.h */ + fprintf(out, + "/* crc32.h -- tables for rapid CRC calculation\n" + " * Generated automatically by crc32.c\n */\n" + "\n" + "local const z_crc_t FAR crc_table[] = {\n" + " "); + write_table(out, crc_table, 256); + fprintf(out, + "};\n"); + + /* write out big-endian CRC table for 64-bit z_word_t to crc32.h */ + fprintf(out, + "\n" + "#ifdef W\n" + "\n" + "#if W == 8\n" + "\n" + "local const z_word_t FAR crc_big_table[] = {\n" + " "); + write_table64(out, crc_big_table, 256); + fprintf(out, + "};\n"); + + /* write out big-endian CRC table for 32-bit z_word_t to crc32.h */ + fprintf(out, + "\n" + "#else /* W == 4 */\n" + "\n" + "local const z_word_t FAR crc_big_table[] = {\n" + " "); + write_table32hi(out, crc_big_table, 256); + fprintf(out, + "};\n" + "\n" + "#endif\n"); + + /* write out braid tables for each value of N */ + for (n = 1; n <= 6; n++) { + fprintf(out, + "\n" + "#if N == %d\n", n); + + /* compute braid tables for this N and 64-bit word_t */ + braid(ltl, big, n, 8); + + /* write out braid tables for 64-bit z_word_t to crc32.h */ + fprintf(out, + "\n" + "#if W == 8\n" + "\n" + "local const z_crc_t FAR crc_braid_table[][256] = {\n"); + for (k = 0; k < 8; k++) { + fprintf(out, " {"); + write_table(out, ltl[k], 256); + fprintf(out, "}%s", k < 7 ? ",\n" : ""); + } + fprintf(out, + "};\n" + "\n" + "local const z_word_t FAR crc_braid_big_table[][256] = {\n"); + for (k = 0; k < 8; k++) { + fprintf(out, " {"); + write_table64(out, big[k], 256); + fprintf(out, "}%s", k < 7 ? ",\n" : ""); + } + fprintf(out, + "};\n"); + + /* compute braid tables for this N and 32-bit word_t */ + braid(ltl, big, n, 4); + + /* write out braid tables for 32-bit z_word_t to crc32.h */ + fprintf(out, + "\n" + "#else /* W == 4 */\n" + "\n" + "local const z_crc_t FAR crc_braid_table[][256] = {\n"); + for (k = 0; k < 4; k++) { + fprintf(out, " {"); + write_table(out, ltl[k], 256); + fprintf(out, "}%s", k < 3 ? ",\n" : ""); + } + fprintf(out, + "};\n" + "\n" + "local const z_word_t FAR crc_braid_big_table[][256] = {\n"); + for (k = 0; k < 4; k++) { + fprintf(out, " {"); + write_table32hi(out, big[k], 256); + fprintf(out, "}%s", k < 3 ? ",\n" : ""); + } + fprintf(out, + "};\n" + "\n" + "#endif\n" + "\n" + "#endif\n"); + } + fprintf(out, + "\n" + "#endif\n"); + + /* write out zeros operator table to crc32.h */ + fprintf(out, + "\n" + "local const z_crc_t FAR x2n_table[] = {\n" + " "); + write_table(out, x2n_table, 32); + fprintf(out, + "};\n"); + fclose(out); + } +#endif /* MAKECRCH */ +} + +#ifdef MAKECRCH + +/* + Write the 32-bit values in table[0..k-1] to out, five per line in + hexadecimal separated by commas. + */ +local void write_table(FILE *out, const z_crc_t FAR *table, int k) { + int n; + + for (n = 0; n < k; n++) + fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : " ", + (unsigned long)(table[n]), + n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", ")); +} + +/* + Write the high 32-bits of each value in table[0..k-1] to out, five per line + in hexadecimal separated by commas. + */ +local void write_table32hi(FILE *out, const z_word_t FAR *table, int k) { + int n; + + for (n = 0; n < k; n++) + fprintf(out, "%s0x%08lx%s", n == 0 || n % 5 ? "" : " ", + (unsigned long)(table[n] >> 32), + n == k - 1 ? "" : (n % 5 == 4 ? ",\n" : ", ")); +} + +/* + Write the 64-bit values in table[0..k-1] to out, three per line in + hexadecimal separated by commas. This assumes that if there is a 64-bit + type, then there is also a long long integer type, and it is at least 64 + bits. If not, then the type cast and format string can be adjusted + accordingly. + */ +local void write_table64(FILE *out, const z_word_t FAR *table, int k) { + int n; + + for (n = 0; n < k; n++) + fprintf(out, "%s0x%016llx%s", n == 0 || n % 3 ? "" : " ", + (unsigned long long)(table[n]), + n == k - 1 ? "" : (n % 3 == 2 ? ",\n" : ", ")); +} + +/* Actually do the deed. */ +int main(void) { + make_crc_table(); + return 0; +} + +#endif /* MAKECRCH */ + +#ifdef W +/* + Generate the little and big-endian braid tables for the given n and z_word_t + size w. Each array must have room for w blocks of 256 elements. + */ +local void braid(z_crc_t ltl[][256], z_word_t big[][256], int n, int w) { + int k; + z_crc_t i, p, q; + for (k = 0; k < w; k++) { + p = x2nmodp((n * w + 3 - k) << 3, 0); + ltl[k][0] = 0; + big[w - 1 - k][0] = 0; + for (i = 1; i < 256; i++) { + ltl[k][i] = q = multmodp(i << 24, p); + big[w - 1 - k][i] = byte_swap(q); + } + } +} +#endif + +#endif /* DYNAMIC_CRC_TABLE */ + +/* ========================================================================= + * This function can be used by asm versions of crc32(), and to force the + * generation of the CRC tables in a threaded application. + */ +const z_crc_t FAR * ZEXPORT get_crc_table(void) { +#ifdef DYNAMIC_CRC_TABLE + once(&made, make_crc_table); +#endif /* DYNAMIC_CRC_TABLE */ + return (const z_crc_t FAR *)crc_table; +} + +/* ========================================================================= + * Use ARM machine instructions if available. This will compute the CRC about + * ten times faster than the braided calculation. This code does not check for + * the presence of the CRC instruction at run time. __ARM_FEATURE_CRC32 will + * only be defined if the compilation specifies an ARM processor architecture + * that has the instructions. For example, compiling with -march=armv8.1-a or + * -march=armv8-a+crc, or -march=native if the compile machine has the crc32 + * instructions. + */ +#ifdef ARMCRC32 + +/* + Constants empirically determined to maximize speed. These values are from + measurements on a Cortex-A57. Your mileage may vary. + */ +#define Z_BATCH 3990 /* number of words in a batch */ +#define Z_BATCH_ZEROS 0xa10d3d0c /* computed from Z_BATCH = 3990 */ +#define Z_BATCH_MIN 800 /* fewest words in a final batch */ + +unsigned long ZEXPORT crc32_z(unsigned long crc, const unsigned char FAR *buf, + z_size_t len) { + z_crc_t val; + z_word_t crc1, crc2; + const z_word_t *word; + z_word_t val0, val1, val2; + z_size_t last, last2, i; + z_size_t num; + + /* Return initial CRC, if requested. */ + if (buf == Z_NULL) return 0; + +#ifdef DYNAMIC_CRC_TABLE + once(&made, make_crc_table); +#endif /* DYNAMIC_CRC_TABLE */ + + /* Pre-condition the CRC */ + crc = (~crc) & 0xffffffff; + + /* Compute the CRC up to a word boundary. */ + while (len && ((z_size_t)buf & 7) != 0) { + len--; + val = *buf++; + __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val)); + } + + /* Prepare to compute the CRC on full 64-bit words word[0..num-1]. */ + word = (z_word_t const *)buf; + num = len >> 3; + len &= 7; + + /* Do three interleaved CRCs to realize the throughput of one crc32x + instruction per cycle. Each CRC is calculated on Z_BATCH words. The + three CRCs are combined into a single CRC after each set of batches. */ + while (num >= 3 * Z_BATCH) { + crc1 = 0; + crc2 = 0; + for (i = 0; i < Z_BATCH; i++) { + val0 = word[i]; + val1 = word[i + Z_BATCH]; + val2 = word[i + 2 * Z_BATCH]; + __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0)); + __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1)); + __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2)); + } + word += 3 * Z_BATCH; + num -= 3 * Z_BATCH; + crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc1; + crc = multmodp(Z_BATCH_ZEROS, crc) ^ crc2; + } + + /* Do one last smaller batch with the remaining words, if there are enough + to pay for the combination of CRCs. */ + last = num / 3; + if (last >= Z_BATCH_MIN) { + last2 = last << 1; + crc1 = 0; + crc2 = 0; + for (i = 0; i < last; i++) { + val0 = word[i]; + val1 = word[i + last]; + val2 = word[i + last2]; + __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0)); + __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc1) : "r"(val1)); + __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc2) : "r"(val2)); + } + word += 3 * last; + num -= 3 * last; + val = x2nmodp(last, 6); + crc = multmodp(val, crc) ^ crc1; + crc = multmodp(val, crc) ^ crc2; + } + + /* Compute the CRC on any remaining words. */ + for (i = 0; i < num; i++) { + val0 = word[i]; + __asm__ volatile("crc32x %w0, %w0, %x1" : "+r"(crc) : "r"(val0)); + } + word += num; + + /* Complete the CRC on any remaining bytes. */ + buf = (const unsigned char FAR *)word; + while (len) { + len--; + val = *buf++; + __asm__ volatile("crc32b %w0, %w0, %w1" : "+r"(crc) : "r"(val)); + } + + /* Return the CRC, post-conditioned. */ + return crc ^ 0xffffffff; +} + +#else + +#ifdef W + +/* + Return the CRC of the W bytes in the word_t data, taking the + least-significant byte of the word as the first byte of data, without any pre + or post conditioning. This is used to combine the CRCs of each braid. + */ +local z_crc_t crc_word(z_word_t data) { + int k; + for (k = 0; k < W; k++) + data = (data >> 8) ^ crc_table[data & 0xff]; + return (z_crc_t)data; +} + +local z_word_t crc_word_big(z_word_t data) { + int k; + for (k = 0; k < W; k++) + data = (data << 8) ^ + crc_big_table[(data >> ((W - 1) << 3)) & 0xff]; + return data; +} + +#endif + +/* ========================================================================= */ +unsigned long ZEXPORT crc32_z(unsigned long crc, const unsigned char FAR *buf, + z_size_t len) { + /* Return initial CRC, if requested. */ + if (buf == Z_NULL) return 0; + +#ifdef DYNAMIC_CRC_TABLE + once(&made, make_crc_table); +#endif /* DYNAMIC_CRC_TABLE */ + + /* Pre-condition the CRC */ + crc = (~crc) & 0xffffffff; + +#ifdef W + + /* If provided enough bytes, do a braided CRC calculation. */ + if (len >= N * W + W - 1) { + z_size_t blks; + z_word_t const *words; + unsigned endian; + int k; + + /* Compute the CRC up to a z_word_t boundary. */ + while (len && ((z_size_t)buf & (W - 1)) != 0) { + len--; + crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; + } + + /* Compute the CRC on as many N z_word_t blocks as are available. */ + blks = len / (N * W); + len -= blks * N * W; + words = (z_word_t const *)buf; + + /* Do endian check at execution time instead of compile time, since ARM + processors can change the endianness at execution time. If the + compiler knows what the endianness will be, it can optimize out the + check and the unused branch. */ + endian = 1; + if (*(unsigned char *)&endian) { + /* Little endian. */ + + z_crc_t crc0; + z_word_t word0; +#if N > 1 + z_crc_t crc1; + z_word_t word1; +#if N > 2 + z_crc_t crc2; + z_word_t word2; +#if N > 3 + z_crc_t crc3; + z_word_t word3; +#if N > 4 + z_crc_t crc4; + z_word_t word4; +#if N > 5 + z_crc_t crc5; + z_word_t word5; +#endif +#endif +#endif +#endif +#endif + + /* Initialize the CRC for each braid. */ + crc0 = crc; +#if N > 1 + crc1 = 0; +#if N > 2 + crc2 = 0; +#if N > 3 + crc3 = 0; +#if N > 4 + crc4 = 0; +#if N > 5 + crc5 = 0; +#endif +#endif +#endif +#endif +#endif + + /* + Process the first blks-1 blocks, computing the CRCs on each braid + independently. + */ + while (--blks) { + /* Load the word for each braid into registers. */ + word0 = crc0 ^ words[0]; +#if N > 1 + word1 = crc1 ^ words[1]; +#if N > 2 + word2 = crc2 ^ words[2]; +#if N > 3 + word3 = crc3 ^ words[3]; +#if N > 4 + word4 = crc4 ^ words[4]; +#if N > 5 + word5 = crc5 ^ words[5]; +#endif +#endif +#endif +#endif +#endif + words += N; + + /* Compute and update the CRC for each word. The loop should + get unrolled. */ + crc0 = crc_braid_table[0][word0 & 0xff]; +#if N > 1 + crc1 = crc_braid_table[0][word1 & 0xff]; +#if N > 2 + crc2 = crc_braid_table[0][word2 & 0xff]; +#if N > 3 + crc3 = crc_braid_table[0][word3 & 0xff]; +#if N > 4 + crc4 = crc_braid_table[0][word4 & 0xff]; +#if N > 5 + crc5 = crc_braid_table[0][word5 & 0xff]; +#endif +#endif +#endif +#endif +#endif + for (k = 1; k < W; k++) { + crc0 ^= crc_braid_table[k][(word0 >> (k << 3)) & 0xff]; +#if N > 1 + crc1 ^= crc_braid_table[k][(word1 >> (k << 3)) & 0xff]; +#if N > 2 + crc2 ^= crc_braid_table[k][(word2 >> (k << 3)) & 0xff]; +#if N > 3 + crc3 ^= crc_braid_table[k][(word3 >> (k << 3)) & 0xff]; +#if N > 4 + crc4 ^= crc_braid_table[k][(word4 >> (k << 3)) & 0xff]; +#if N > 5 + crc5 ^= crc_braid_table[k][(word5 >> (k << 3)) & 0xff]; +#endif +#endif +#endif +#endif +#endif + } + } + + /* + Process the last block, combining the CRCs of the N braids at the + same time. + */ + crc = crc_word(crc0 ^ words[0]); +#if N > 1 + crc = crc_word(crc1 ^ words[1] ^ crc); +#if N > 2 + crc = crc_word(crc2 ^ words[2] ^ crc); +#if N > 3 + crc = crc_word(crc3 ^ words[3] ^ crc); +#if N > 4 + crc = crc_word(crc4 ^ words[4] ^ crc); +#if N > 5 + crc = crc_word(crc5 ^ words[5] ^ crc); +#endif +#endif +#endif +#endif +#endif + words += N; + } + else { + /* Big endian. */ + + z_word_t crc0, word0, comb; +#if N > 1 + z_word_t crc1, word1; +#if N > 2 + z_word_t crc2, word2; +#if N > 3 + z_word_t crc3, word3; +#if N > 4 + z_word_t crc4, word4; +#if N > 5 + z_word_t crc5, word5; +#endif +#endif +#endif +#endif +#endif + + /* Initialize the CRC for each braid. */ + crc0 = byte_swap(crc); +#if N > 1 + crc1 = 0; +#if N > 2 + crc2 = 0; +#if N > 3 + crc3 = 0; +#if N > 4 + crc4 = 0; +#if N > 5 + crc5 = 0; +#endif +#endif +#endif +#endif +#endif + + /* + Process the first blks-1 blocks, computing the CRCs on each braid + independently. + */ + while (--blks) { + /* Load the word for each braid into registers. */ + word0 = crc0 ^ words[0]; +#if N > 1 + word1 = crc1 ^ words[1]; +#if N > 2 + word2 = crc2 ^ words[2]; +#if N > 3 + word3 = crc3 ^ words[3]; +#if N > 4 + word4 = crc4 ^ words[4]; +#if N > 5 + word5 = crc5 ^ words[5]; +#endif +#endif +#endif +#endif +#endif + words += N; + + /* Compute and update the CRC for each word. The loop should + get unrolled. */ + crc0 = crc_braid_big_table[0][word0 & 0xff]; +#if N > 1 + crc1 = crc_braid_big_table[0][word1 & 0xff]; +#if N > 2 + crc2 = crc_braid_big_table[0][word2 & 0xff]; +#if N > 3 + crc3 = crc_braid_big_table[0][word3 & 0xff]; +#if N > 4 + crc4 = crc_braid_big_table[0][word4 & 0xff]; +#if N > 5 + crc5 = crc_braid_big_table[0][word5 & 0xff]; +#endif +#endif +#endif +#endif +#endif + for (k = 1; k < W; k++) { + crc0 ^= crc_braid_big_table[k][(word0 >> (k << 3)) & 0xff]; +#if N > 1 + crc1 ^= crc_braid_big_table[k][(word1 >> (k << 3)) & 0xff]; +#if N > 2 + crc2 ^= crc_braid_big_table[k][(word2 >> (k << 3)) & 0xff]; +#if N > 3 + crc3 ^= crc_braid_big_table[k][(word3 >> (k << 3)) & 0xff]; +#if N > 4 + crc4 ^= crc_braid_big_table[k][(word4 >> (k << 3)) & 0xff]; +#if N > 5 + crc5 ^= crc_braid_big_table[k][(word5 >> (k << 3)) & 0xff]; +#endif +#endif +#endif +#endif +#endif + } + } + + /* + Process the last block, combining the CRCs of the N braids at the + same time. + */ + comb = crc_word_big(crc0 ^ words[0]); +#if N > 1 + comb = crc_word_big(crc1 ^ words[1] ^ comb); +#if N > 2 + comb = crc_word_big(crc2 ^ words[2] ^ comb); +#if N > 3 + comb = crc_word_big(crc3 ^ words[3] ^ comb); +#if N > 4 + comb = crc_word_big(crc4 ^ words[4] ^ comb); +#if N > 5 + comb = crc_word_big(crc5 ^ words[5] ^ comb); +#endif +#endif +#endif +#endif +#endif + words += N; + crc = byte_swap(comb); + } + + /* + Update the pointer to the remaining bytes to process. + */ + buf = (unsigned char const *)words; + } + +#endif /* W */ + + /* Complete the computation of the CRC on any remaining bytes. */ + while (len >= 8) { + len -= 8; + crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; + crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; + crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; + crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; + crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; + crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; + crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; + crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; + } + while (len) { + len--; + crc = (crc >> 8) ^ crc_table[(crc ^ *buf++) & 0xff]; + } + + /* Return the CRC, post-conditioned. */ + return crc ^ 0xffffffff; +} + +#endif + +/* ========================================================================= */ +unsigned long ZEXPORT crc32(unsigned long crc, const unsigned char FAR *buf, + uInt len) { + return crc32_z(crc, buf, len); +} + +/* ========================================================================= */ +uLong ZEXPORT crc32_combine64(uLong crc1, uLong crc2, z_off64_t len2) { +#ifdef DYNAMIC_CRC_TABLE + once(&made, make_crc_table); +#endif /* DYNAMIC_CRC_TABLE */ + return multmodp(x2nmodp(len2, 3), crc1) ^ (crc2 & 0xffffffff); +} + +/* ========================================================================= */ +uLong ZEXPORT crc32_combine(uLong crc1, uLong crc2, z_off_t len2) { + return crc32_combine64(crc1, crc2, (z_off64_t)len2); +} + +/* ========================================================================= */ +uLong ZEXPORT crc32_combine_gen64(z_off64_t len2) { +#ifdef DYNAMIC_CRC_TABLE + once(&made, make_crc_table); +#endif /* DYNAMIC_CRC_TABLE */ + return x2nmodp(len2, 3); +} + +/* ========================================================================= */ +uLong ZEXPORT crc32_combine_gen(z_off_t len2) { + return crc32_combine_gen64((z_off64_t)len2); +} + +/* ========================================================================= */ +uLong ZEXPORT crc32_combine_op(uLong crc1, uLong crc2, uLong op) { + return multmodp(op, crc1) ^ (crc2 & 0xffffffff); +} diff --git a/c-blosc/internal-complibs/zlib-1.3.1/crc32.h b/c-blosc/internal-complibs/zlib-1.3.1/crc32.h new file mode 100644 index 0000000..137df68 --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.3.1/crc32.h @@ -0,0 +1,9446 @@ +/* crc32.h -- tables for rapid CRC calculation + * Generated automatically by crc32.c + */ + +local const z_crc_t FAR crc_table[] = { + 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, + 0x706af48f, 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, + 0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, + 0x90bf1d91, 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, + 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, 0x136c9856, + 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9, + 0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, + 0xa2677172, 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, + 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, + 0x45df5c75, 0xdcd60dcf, 0xabd13d59, 0x26d930ac, 0x51de003a, + 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, 0xcfba9599, + 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, + 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190, + 0x01db7106, 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, + 0x9fbfe4a5, 0xe8b8d433, 0x7807c9a2, 0x0f00f934, 0x9609a88e, + 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01, + 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, 0x6c0695ed, + 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950, + 0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, + 0xfbd44c65, 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, + 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, + 0x346ed9fc, 0xad678846, 0xda60b8d0, 0x44042d73, 0x33031de5, + 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa, 0xbe0b1010, + 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, + 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, + 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, + 0x03b6e20c, 0x74b1d29a, 0xead54739, 0x9dd277af, 0x04db2615, + 0x73dc1683, 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, + 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, 0xf00f9344, + 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb, + 0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, + 0x67dd4acc, 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, + 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, + 0xa6bc5767, 0x3fb506dd, 0x48b2364b, 0xd80d2bda, 0xaf0a1b4c, + 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, 0x316e8eef, + 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, + 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, + 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, + 0x2cd99e8b, 0x5bdeae1d, 0x9b64c2b0, 0xec63f226, 0x756aa39c, + 0x026d930a, 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713, + 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 0x92d28e9b, + 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242, + 0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, + 0x18b74777, 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, + 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, 0xa00ae278, + 0xd70dd2ee, 0x4e048354, 0x3903b3c2, 0xa7672661, 0xd06016f7, + 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, 0x40df0b66, + 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, + 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, + 0xcdd70693, 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, + 0x5d681b02, 0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, + 0x2d02ef8d}; + +#ifdef W + +#if W == 8 + +local const z_word_t FAR crc_big_table[] = { + 0x0000000000000000, 0x9630077700000000, 0x2c610eee00000000, + 0xba51099900000000, 0x19c46d0700000000, 0x8ff46a7000000000, + 0x35a563e900000000, 0xa395649e00000000, 0x3288db0e00000000, + 0xa4b8dc7900000000, 0x1ee9d5e000000000, 0x88d9d29700000000, + 0x2b4cb60900000000, 0xbd7cb17e00000000, 0x072db8e700000000, + 0x911dbf9000000000, 0x6410b71d00000000, 0xf220b06a00000000, + 0x4871b9f300000000, 0xde41be8400000000, 0x7dd4da1a00000000, + 0xebe4dd6d00000000, 0x51b5d4f400000000, 0xc785d38300000000, + 0x56986c1300000000, 0xc0a86b6400000000, 0x7af962fd00000000, + 0xecc9658a00000000, 0x4f5c011400000000, 0xd96c066300000000, + 0x633d0ffa00000000, 0xf50d088d00000000, 0xc8206e3b00000000, + 0x5e10694c00000000, 0xe44160d500000000, 0x727167a200000000, + 0xd1e4033c00000000, 0x47d4044b00000000, 0xfd850dd200000000, + 0x6bb50aa500000000, 0xfaa8b53500000000, 0x6c98b24200000000, + 0xd6c9bbdb00000000, 0x40f9bcac00000000, 0xe36cd83200000000, + 0x755cdf4500000000, 0xcf0dd6dc00000000, 0x593dd1ab00000000, + 0xac30d92600000000, 0x3a00de5100000000, 0x8051d7c800000000, + 0x1661d0bf00000000, 0xb5f4b42100000000, 0x23c4b35600000000, + 0x9995bacf00000000, 0x0fa5bdb800000000, 0x9eb8022800000000, + 0x0888055f00000000, 0xb2d90cc600000000, 0x24e90bb100000000, + 0x877c6f2f00000000, 0x114c685800000000, 0xab1d61c100000000, + 0x3d2d66b600000000, 0x9041dc7600000000, 0x0671db0100000000, + 0xbc20d29800000000, 0x2a10d5ef00000000, 0x8985b17100000000, + 0x1fb5b60600000000, 0xa5e4bf9f00000000, 0x33d4b8e800000000, + 0xa2c9077800000000, 0x34f9000f00000000, 0x8ea8099600000000, + 0x18980ee100000000, 0xbb0d6a7f00000000, 0x2d3d6d0800000000, + 0x976c649100000000, 0x015c63e600000000, 0xf4516b6b00000000, + 0x62616c1c00000000, 0xd830658500000000, 0x4e0062f200000000, + 0xed95066c00000000, 0x7ba5011b00000000, 0xc1f4088200000000, + 0x57c40ff500000000, 0xc6d9b06500000000, 0x50e9b71200000000, + 0xeab8be8b00000000, 0x7c88b9fc00000000, 0xdf1ddd6200000000, + 0x492dda1500000000, 0xf37cd38c00000000, 0x654cd4fb00000000, + 0x5861b24d00000000, 0xce51b53a00000000, 0x7400bca300000000, + 0xe230bbd400000000, 0x41a5df4a00000000, 0xd795d83d00000000, + 0x6dc4d1a400000000, 0xfbf4d6d300000000, 0x6ae9694300000000, + 0xfcd96e3400000000, 0x468867ad00000000, 0xd0b860da00000000, + 0x732d044400000000, 0xe51d033300000000, 0x5f4c0aaa00000000, + 0xc97c0ddd00000000, 0x3c71055000000000, 0xaa41022700000000, + 0x10100bbe00000000, 0x86200cc900000000, 0x25b5685700000000, + 0xb3856f2000000000, 0x09d466b900000000, 0x9fe461ce00000000, + 0x0ef9de5e00000000, 0x98c9d92900000000, 0x2298d0b000000000, + 0xb4a8d7c700000000, 0x173db35900000000, 0x810db42e00000000, + 0x3b5cbdb700000000, 0xad6cbac000000000, 0x2083b8ed00000000, + 0xb6b3bf9a00000000, 0x0ce2b60300000000, 0x9ad2b17400000000, + 0x3947d5ea00000000, 0xaf77d29d00000000, 0x1526db0400000000, + 0x8316dc7300000000, 0x120b63e300000000, 0x843b649400000000, + 0x3e6a6d0d00000000, 0xa85a6a7a00000000, 0x0bcf0ee400000000, + 0x9dff099300000000, 0x27ae000a00000000, 0xb19e077d00000000, + 0x44930ff000000000, 0xd2a3088700000000, 0x68f2011e00000000, + 0xfec2066900000000, 0x5d5762f700000000, 0xcb67658000000000, + 0x71366c1900000000, 0xe7066b6e00000000, 0x761bd4fe00000000, + 0xe02bd38900000000, 0x5a7ada1000000000, 0xcc4add6700000000, + 0x6fdfb9f900000000, 0xf9efbe8e00000000, 0x43beb71700000000, + 0xd58eb06000000000, 0xe8a3d6d600000000, 0x7e93d1a100000000, + 0xc4c2d83800000000, 0x52f2df4f00000000, 0xf167bbd100000000, + 0x6757bca600000000, 0xdd06b53f00000000, 0x4b36b24800000000, + 0xda2b0dd800000000, 0x4c1b0aaf00000000, 0xf64a033600000000, + 0x607a044100000000, 0xc3ef60df00000000, 0x55df67a800000000, + 0xef8e6e3100000000, 0x79be694600000000, 0x8cb361cb00000000, + 0x1a8366bc00000000, 0xa0d26f2500000000, 0x36e2685200000000, + 0x95770ccc00000000, 0x03470bbb00000000, 0xb916022200000000, + 0x2f26055500000000, 0xbe3bbac500000000, 0x280bbdb200000000, + 0x925ab42b00000000, 0x046ab35c00000000, 0xa7ffd7c200000000, + 0x31cfd0b500000000, 0x8b9ed92c00000000, 0x1daede5b00000000, + 0xb0c2649b00000000, 0x26f263ec00000000, 0x9ca36a7500000000, + 0x0a936d0200000000, 0xa906099c00000000, 0x3f360eeb00000000, + 0x8567077200000000, 0x1357000500000000, 0x824abf9500000000, + 0x147ab8e200000000, 0xae2bb17b00000000, 0x381bb60c00000000, + 0x9b8ed29200000000, 0x0dbed5e500000000, 0xb7efdc7c00000000, + 0x21dfdb0b00000000, 0xd4d2d38600000000, 0x42e2d4f100000000, + 0xf8b3dd6800000000, 0x6e83da1f00000000, 0xcd16be8100000000, + 0x5b26b9f600000000, 0xe177b06f00000000, 0x7747b71800000000, + 0xe65a088800000000, 0x706a0fff00000000, 0xca3b066600000000, + 0x5c0b011100000000, 0xff9e658f00000000, 0x69ae62f800000000, + 0xd3ff6b6100000000, 0x45cf6c1600000000, 0x78e20aa000000000, + 0xeed20dd700000000, 0x5483044e00000000, 0xc2b3033900000000, + 0x612667a700000000, 0xf71660d000000000, 0x4d47694900000000, + 0xdb776e3e00000000, 0x4a6ad1ae00000000, 0xdc5ad6d900000000, + 0x660bdf4000000000, 0xf03bd83700000000, 0x53aebca900000000, + 0xc59ebbde00000000, 0x7fcfb24700000000, 0xe9ffb53000000000, + 0x1cf2bdbd00000000, 0x8ac2baca00000000, 0x3093b35300000000, + 0xa6a3b42400000000, 0x0536d0ba00000000, 0x9306d7cd00000000, + 0x2957de5400000000, 0xbf67d92300000000, 0x2e7a66b300000000, + 0xb84a61c400000000, 0x021b685d00000000, 0x942b6f2a00000000, + 0x37be0bb400000000, 0xa18e0cc300000000, 0x1bdf055a00000000, + 0x8def022d00000000}; + +#else /* W == 4 */ + +local const z_word_t FAR crc_big_table[] = { + 0x00000000, 0x96300777, 0x2c610eee, 0xba510999, 0x19c46d07, + 0x8ff46a70, 0x35a563e9, 0xa395649e, 0x3288db0e, 0xa4b8dc79, + 0x1ee9d5e0, 0x88d9d297, 0x2b4cb609, 0xbd7cb17e, 0x072db8e7, + 0x911dbf90, 0x6410b71d, 0xf220b06a, 0x4871b9f3, 0xde41be84, + 0x7dd4da1a, 0xebe4dd6d, 0x51b5d4f4, 0xc785d383, 0x56986c13, + 0xc0a86b64, 0x7af962fd, 0xecc9658a, 0x4f5c0114, 0xd96c0663, + 0x633d0ffa, 0xf50d088d, 0xc8206e3b, 0x5e10694c, 0xe44160d5, + 0x727167a2, 0xd1e4033c, 0x47d4044b, 0xfd850dd2, 0x6bb50aa5, + 0xfaa8b535, 0x6c98b242, 0xd6c9bbdb, 0x40f9bcac, 0xe36cd832, + 0x755cdf45, 0xcf0dd6dc, 0x593dd1ab, 0xac30d926, 0x3a00de51, + 0x8051d7c8, 0x1661d0bf, 0xb5f4b421, 0x23c4b356, 0x9995bacf, + 0x0fa5bdb8, 0x9eb80228, 0x0888055f, 0xb2d90cc6, 0x24e90bb1, + 0x877c6f2f, 0x114c6858, 0xab1d61c1, 0x3d2d66b6, 0x9041dc76, + 0x0671db01, 0xbc20d298, 0x2a10d5ef, 0x8985b171, 0x1fb5b606, + 0xa5e4bf9f, 0x33d4b8e8, 0xa2c90778, 0x34f9000f, 0x8ea80996, + 0x18980ee1, 0xbb0d6a7f, 0x2d3d6d08, 0x976c6491, 0x015c63e6, + 0xf4516b6b, 0x62616c1c, 0xd8306585, 0x4e0062f2, 0xed95066c, + 0x7ba5011b, 0xc1f40882, 0x57c40ff5, 0xc6d9b065, 0x50e9b712, + 0xeab8be8b, 0x7c88b9fc, 0xdf1ddd62, 0x492dda15, 0xf37cd38c, + 0x654cd4fb, 0x5861b24d, 0xce51b53a, 0x7400bca3, 0xe230bbd4, + 0x41a5df4a, 0xd795d83d, 0x6dc4d1a4, 0xfbf4d6d3, 0x6ae96943, + 0xfcd96e34, 0x468867ad, 0xd0b860da, 0x732d0444, 0xe51d0333, + 0x5f4c0aaa, 0xc97c0ddd, 0x3c710550, 0xaa410227, 0x10100bbe, + 0x86200cc9, 0x25b56857, 0xb3856f20, 0x09d466b9, 0x9fe461ce, + 0x0ef9de5e, 0x98c9d929, 0x2298d0b0, 0xb4a8d7c7, 0x173db359, + 0x810db42e, 0x3b5cbdb7, 0xad6cbac0, 0x2083b8ed, 0xb6b3bf9a, + 0x0ce2b603, 0x9ad2b174, 0x3947d5ea, 0xaf77d29d, 0x1526db04, + 0x8316dc73, 0x120b63e3, 0x843b6494, 0x3e6a6d0d, 0xa85a6a7a, + 0x0bcf0ee4, 0x9dff0993, 0x27ae000a, 0xb19e077d, 0x44930ff0, + 0xd2a30887, 0x68f2011e, 0xfec20669, 0x5d5762f7, 0xcb676580, + 0x71366c19, 0xe7066b6e, 0x761bd4fe, 0xe02bd389, 0x5a7ada10, + 0xcc4add67, 0x6fdfb9f9, 0xf9efbe8e, 0x43beb717, 0xd58eb060, + 0xe8a3d6d6, 0x7e93d1a1, 0xc4c2d838, 0x52f2df4f, 0xf167bbd1, + 0x6757bca6, 0xdd06b53f, 0x4b36b248, 0xda2b0dd8, 0x4c1b0aaf, + 0xf64a0336, 0x607a0441, 0xc3ef60df, 0x55df67a8, 0xef8e6e31, + 0x79be6946, 0x8cb361cb, 0x1a8366bc, 0xa0d26f25, 0x36e26852, + 0x95770ccc, 0x03470bbb, 0xb9160222, 0x2f260555, 0xbe3bbac5, + 0x280bbdb2, 0x925ab42b, 0x046ab35c, 0xa7ffd7c2, 0x31cfd0b5, + 0x8b9ed92c, 0x1daede5b, 0xb0c2649b, 0x26f263ec, 0x9ca36a75, + 0x0a936d02, 0xa906099c, 0x3f360eeb, 0x85670772, 0x13570005, + 0x824abf95, 0x147ab8e2, 0xae2bb17b, 0x381bb60c, 0x9b8ed292, + 0x0dbed5e5, 0xb7efdc7c, 0x21dfdb0b, 0xd4d2d386, 0x42e2d4f1, + 0xf8b3dd68, 0x6e83da1f, 0xcd16be81, 0x5b26b9f6, 0xe177b06f, + 0x7747b718, 0xe65a0888, 0x706a0fff, 0xca3b0666, 0x5c0b0111, + 0xff9e658f, 0x69ae62f8, 0xd3ff6b61, 0x45cf6c16, 0x78e20aa0, + 0xeed20dd7, 0x5483044e, 0xc2b30339, 0x612667a7, 0xf71660d0, + 0x4d476949, 0xdb776e3e, 0x4a6ad1ae, 0xdc5ad6d9, 0x660bdf40, + 0xf03bd837, 0x53aebca9, 0xc59ebbde, 0x7fcfb247, 0xe9ffb530, + 0x1cf2bdbd, 0x8ac2baca, 0x3093b353, 0xa6a3b424, 0x0536d0ba, + 0x9306d7cd, 0x2957de54, 0xbf67d923, 0x2e7a66b3, 0xb84a61c4, + 0x021b685d, 0x942b6f2a, 0x37be0bb4, 0xa18e0cc3, 0x1bdf055a, + 0x8def022d}; + +#endif + +#if N == 1 + +#if W == 8 + +local const z_crc_t FAR crc_braid_table[][256] = { + {0x00000000, 0xccaa009e, 0x4225077d, 0x8e8f07e3, 0x844a0efa, + 0x48e00e64, 0xc66f0987, 0x0ac50919, 0xd3e51bb5, 0x1f4f1b2b, + 0x91c01cc8, 0x5d6a1c56, 0x57af154f, 0x9b0515d1, 0x158a1232, + 0xd92012ac, 0x7cbb312b, 0xb01131b5, 0x3e9e3656, 0xf23436c8, + 0xf8f13fd1, 0x345b3f4f, 0xbad438ac, 0x767e3832, 0xaf5e2a9e, + 0x63f42a00, 0xed7b2de3, 0x21d12d7d, 0x2b142464, 0xe7be24fa, + 0x69312319, 0xa59b2387, 0xf9766256, 0x35dc62c8, 0xbb53652b, + 0x77f965b5, 0x7d3c6cac, 0xb1966c32, 0x3f196bd1, 0xf3b36b4f, + 0x2a9379e3, 0xe639797d, 0x68b67e9e, 0xa41c7e00, 0xaed97719, + 0x62737787, 0xecfc7064, 0x205670fa, 0x85cd537d, 0x496753e3, + 0xc7e85400, 0x0b42549e, 0x01875d87, 0xcd2d5d19, 0x43a25afa, + 0x8f085a64, 0x562848c8, 0x9a824856, 0x140d4fb5, 0xd8a74f2b, + 0xd2624632, 0x1ec846ac, 0x9047414f, 0x5ced41d1, 0x299dc2ed, + 0xe537c273, 0x6bb8c590, 0xa712c50e, 0xadd7cc17, 0x617dcc89, + 0xeff2cb6a, 0x2358cbf4, 0xfa78d958, 0x36d2d9c6, 0xb85dde25, + 0x74f7debb, 0x7e32d7a2, 0xb298d73c, 0x3c17d0df, 0xf0bdd041, + 0x5526f3c6, 0x998cf358, 0x1703f4bb, 0xdba9f425, 0xd16cfd3c, + 0x1dc6fda2, 0x9349fa41, 0x5fe3fadf, 0x86c3e873, 0x4a69e8ed, + 0xc4e6ef0e, 0x084cef90, 0x0289e689, 0xce23e617, 0x40ace1f4, + 0x8c06e16a, 0xd0eba0bb, 0x1c41a025, 0x92cea7c6, 0x5e64a758, + 0x54a1ae41, 0x980baedf, 0x1684a93c, 0xda2ea9a2, 0x030ebb0e, + 0xcfa4bb90, 0x412bbc73, 0x8d81bced, 0x8744b5f4, 0x4beeb56a, + 0xc561b289, 0x09cbb217, 0xac509190, 0x60fa910e, 0xee7596ed, + 0x22df9673, 0x281a9f6a, 0xe4b09ff4, 0x6a3f9817, 0xa6959889, + 0x7fb58a25, 0xb31f8abb, 0x3d908d58, 0xf13a8dc6, 0xfbff84df, + 0x37558441, 0xb9da83a2, 0x7570833c, 0x533b85da, 0x9f918544, + 0x111e82a7, 0xddb48239, 0xd7718b20, 0x1bdb8bbe, 0x95548c5d, + 0x59fe8cc3, 0x80de9e6f, 0x4c749ef1, 0xc2fb9912, 0x0e51998c, + 0x04949095, 0xc83e900b, 0x46b197e8, 0x8a1b9776, 0x2f80b4f1, + 0xe32ab46f, 0x6da5b38c, 0xa10fb312, 0xabcaba0b, 0x6760ba95, + 0xe9efbd76, 0x2545bde8, 0xfc65af44, 0x30cfafda, 0xbe40a839, + 0x72eaa8a7, 0x782fa1be, 0xb485a120, 0x3a0aa6c3, 0xf6a0a65d, + 0xaa4de78c, 0x66e7e712, 0xe868e0f1, 0x24c2e06f, 0x2e07e976, + 0xe2ade9e8, 0x6c22ee0b, 0xa088ee95, 0x79a8fc39, 0xb502fca7, + 0x3b8dfb44, 0xf727fbda, 0xfde2f2c3, 0x3148f25d, 0xbfc7f5be, + 0x736df520, 0xd6f6d6a7, 0x1a5cd639, 0x94d3d1da, 0x5879d144, + 0x52bcd85d, 0x9e16d8c3, 0x1099df20, 0xdc33dfbe, 0x0513cd12, + 0xc9b9cd8c, 0x4736ca6f, 0x8b9ccaf1, 0x8159c3e8, 0x4df3c376, + 0xc37cc495, 0x0fd6c40b, 0x7aa64737, 0xb60c47a9, 0x3883404a, + 0xf42940d4, 0xfeec49cd, 0x32464953, 0xbcc94eb0, 0x70634e2e, + 0xa9435c82, 0x65e95c1c, 0xeb665bff, 0x27cc5b61, 0x2d095278, + 0xe1a352e6, 0x6f2c5505, 0xa386559b, 0x061d761c, 0xcab77682, + 0x44387161, 0x889271ff, 0x825778e6, 0x4efd7878, 0xc0727f9b, + 0x0cd87f05, 0xd5f86da9, 0x19526d37, 0x97dd6ad4, 0x5b776a4a, + 0x51b26353, 0x9d1863cd, 0x1397642e, 0xdf3d64b0, 0x83d02561, + 0x4f7a25ff, 0xc1f5221c, 0x0d5f2282, 0x079a2b9b, 0xcb302b05, + 0x45bf2ce6, 0x89152c78, 0x50353ed4, 0x9c9f3e4a, 0x121039a9, + 0xdeba3937, 0xd47f302e, 0x18d530b0, 0x965a3753, 0x5af037cd, + 0xff6b144a, 0x33c114d4, 0xbd4e1337, 0x71e413a9, 0x7b211ab0, + 0xb78b1a2e, 0x39041dcd, 0xf5ae1d53, 0x2c8e0fff, 0xe0240f61, + 0x6eab0882, 0xa201081c, 0xa8c40105, 0x646e019b, 0xeae10678, + 0x264b06e6}, + {0x00000000, 0xa6770bb4, 0x979f1129, 0x31e81a9d, 0xf44f2413, + 0x52382fa7, 0x63d0353a, 0xc5a73e8e, 0x33ef4e67, 0x959845d3, + 0xa4705f4e, 0x020754fa, 0xc7a06a74, 0x61d761c0, 0x503f7b5d, + 0xf64870e9, 0x67de9cce, 0xc1a9977a, 0xf0418de7, 0x56368653, + 0x9391b8dd, 0x35e6b369, 0x040ea9f4, 0xa279a240, 0x5431d2a9, + 0xf246d91d, 0xc3aec380, 0x65d9c834, 0xa07ef6ba, 0x0609fd0e, + 0x37e1e793, 0x9196ec27, 0xcfbd399c, 0x69ca3228, 0x582228b5, + 0xfe552301, 0x3bf21d8f, 0x9d85163b, 0xac6d0ca6, 0x0a1a0712, + 0xfc5277fb, 0x5a257c4f, 0x6bcd66d2, 0xcdba6d66, 0x081d53e8, + 0xae6a585c, 0x9f8242c1, 0x39f54975, 0xa863a552, 0x0e14aee6, + 0x3ffcb47b, 0x998bbfcf, 0x5c2c8141, 0xfa5b8af5, 0xcbb39068, + 0x6dc49bdc, 0x9b8ceb35, 0x3dfbe081, 0x0c13fa1c, 0xaa64f1a8, + 0x6fc3cf26, 0xc9b4c492, 0xf85cde0f, 0x5e2bd5bb, 0x440b7579, + 0xe27c7ecd, 0xd3946450, 0x75e36fe4, 0xb044516a, 0x16335ade, + 0x27db4043, 0x81ac4bf7, 0x77e43b1e, 0xd19330aa, 0xe07b2a37, + 0x460c2183, 0x83ab1f0d, 0x25dc14b9, 0x14340e24, 0xb2430590, + 0x23d5e9b7, 0x85a2e203, 0xb44af89e, 0x123df32a, 0xd79acda4, + 0x71edc610, 0x4005dc8d, 0xe672d739, 0x103aa7d0, 0xb64dac64, + 0x87a5b6f9, 0x21d2bd4d, 0xe47583c3, 0x42028877, 0x73ea92ea, + 0xd59d995e, 0x8bb64ce5, 0x2dc14751, 0x1c295dcc, 0xba5e5678, + 0x7ff968f6, 0xd98e6342, 0xe86679df, 0x4e11726b, 0xb8590282, + 0x1e2e0936, 0x2fc613ab, 0x89b1181f, 0x4c162691, 0xea612d25, + 0xdb8937b8, 0x7dfe3c0c, 0xec68d02b, 0x4a1fdb9f, 0x7bf7c102, + 0xdd80cab6, 0x1827f438, 0xbe50ff8c, 0x8fb8e511, 0x29cfeea5, + 0xdf879e4c, 0x79f095f8, 0x48188f65, 0xee6f84d1, 0x2bc8ba5f, + 0x8dbfb1eb, 0xbc57ab76, 0x1a20a0c2, 0x8816eaf2, 0x2e61e146, + 0x1f89fbdb, 0xb9fef06f, 0x7c59cee1, 0xda2ec555, 0xebc6dfc8, + 0x4db1d47c, 0xbbf9a495, 0x1d8eaf21, 0x2c66b5bc, 0x8a11be08, + 0x4fb68086, 0xe9c18b32, 0xd82991af, 0x7e5e9a1b, 0xefc8763c, + 0x49bf7d88, 0x78576715, 0xde206ca1, 0x1b87522f, 0xbdf0599b, + 0x8c184306, 0x2a6f48b2, 0xdc27385b, 0x7a5033ef, 0x4bb82972, + 0xedcf22c6, 0x28681c48, 0x8e1f17fc, 0xbff70d61, 0x198006d5, + 0x47abd36e, 0xe1dcd8da, 0xd034c247, 0x7643c9f3, 0xb3e4f77d, + 0x1593fcc9, 0x247be654, 0x820cede0, 0x74449d09, 0xd23396bd, + 0xe3db8c20, 0x45ac8794, 0x800bb91a, 0x267cb2ae, 0x1794a833, + 0xb1e3a387, 0x20754fa0, 0x86024414, 0xb7ea5e89, 0x119d553d, + 0xd43a6bb3, 0x724d6007, 0x43a57a9a, 0xe5d2712e, 0x139a01c7, + 0xb5ed0a73, 0x840510ee, 0x22721b5a, 0xe7d525d4, 0x41a22e60, + 0x704a34fd, 0xd63d3f49, 0xcc1d9f8b, 0x6a6a943f, 0x5b828ea2, + 0xfdf58516, 0x3852bb98, 0x9e25b02c, 0xafcdaab1, 0x09baa105, + 0xfff2d1ec, 0x5985da58, 0x686dc0c5, 0xce1acb71, 0x0bbdf5ff, + 0xadcafe4b, 0x9c22e4d6, 0x3a55ef62, 0xabc30345, 0x0db408f1, + 0x3c5c126c, 0x9a2b19d8, 0x5f8c2756, 0xf9fb2ce2, 0xc813367f, + 0x6e643dcb, 0x982c4d22, 0x3e5b4696, 0x0fb35c0b, 0xa9c457bf, + 0x6c636931, 0xca146285, 0xfbfc7818, 0x5d8b73ac, 0x03a0a617, + 0xa5d7ada3, 0x943fb73e, 0x3248bc8a, 0xf7ef8204, 0x519889b0, + 0x6070932d, 0xc6079899, 0x304fe870, 0x9638e3c4, 0xa7d0f959, + 0x01a7f2ed, 0xc400cc63, 0x6277c7d7, 0x539fdd4a, 0xf5e8d6fe, + 0x647e3ad9, 0xc209316d, 0xf3e12bf0, 0x55962044, 0x90311eca, + 0x3646157e, 0x07ae0fe3, 0xa1d90457, 0x579174be, 0xf1e67f0a, + 0xc00e6597, 0x66796e23, 0xa3de50ad, 0x05a95b19, 0x34414184, + 0x92364a30}, + {0x00000000, 0xcb5cd3a5, 0x4dc8a10b, 0x869472ae, 0x9b914216, + 0x50cd91b3, 0xd659e31d, 0x1d0530b8, 0xec53826d, 0x270f51c8, + 0xa19b2366, 0x6ac7f0c3, 0x77c2c07b, 0xbc9e13de, 0x3a0a6170, + 0xf156b2d5, 0x03d6029b, 0xc88ad13e, 0x4e1ea390, 0x85427035, + 0x9847408d, 0x531b9328, 0xd58fe186, 0x1ed33223, 0xef8580f6, + 0x24d95353, 0xa24d21fd, 0x6911f258, 0x7414c2e0, 0xbf481145, + 0x39dc63eb, 0xf280b04e, 0x07ac0536, 0xccf0d693, 0x4a64a43d, + 0x81387798, 0x9c3d4720, 0x57619485, 0xd1f5e62b, 0x1aa9358e, + 0xebff875b, 0x20a354fe, 0xa6372650, 0x6d6bf5f5, 0x706ec54d, + 0xbb3216e8, 0x3da66446, 0xf6fab7e3, 0x047a07ad, 0xcf26d408, + 0x49b2a6a6, 0x82ee7503, 0x9feb45bb, 0x54b7961e, 0xd223e4b0, + 0x197f3715, 0xe82985c0, 0x23755665, 0xa5e124cb, 0x6ebdf76e, + 0x73b8c7d6, 0xb8e41473, 0x3e7066dd, 0xf52cb578, 0x0f580a6c, + 0xc404d9c9, 0x4290ab67, 0x89cc78c2, 0x94c9487a, 0x5f959bdf, + 0xd901e971, 0x125d3ad4, 0xe30b8801, 0x28575ba4, 0xaec3290a, + 0x659ffaaf, 0x789aca17, 0xb3c619b2, 0x35526b1c, 0xfe0eb8b9, + 0x0c8e08f7, 0xc7d2db52, 0x4146a9fc, 0x8a1a7a59, 0x971f4ae1, + 0x5c439944, 0xdad7ebea, 0x118b384f, 0xe0dd8a9a, 0x2b81593f, + 0xad152b91, 0x6649f834, 0x7b4cc88c, 0xb0101b29, 0x36846987, + 0xfdd8ba22, 0x08f40f5a, 0xc3a8dcff, 0x453cae51, 0x8e607df4, + 0x93654d4c, 0x58399ee9, 0xdeadec47, 0x15f13fe2, 0xe4a78d37, + 0x2ffb5e92, 0xa96f2c3c, 0x6233ff99, 0x7f36cf21, 0xb46a1c84, + 0x32fe6e2a, 0xf9a2bd8f, 0x0b220dc1, 0xc07ede64, 0x46eaacca, + 0x8db67f6f, 0x90b34fd7, 0x5bef9c72, 0xdd7beedc, 0x16273d79, + 0xe7718fac, 0x2c2d5c09, 0xaab92ea7, 0x61e5fd02, 0x7ce0cdba, + 0xb7bc1e1f, 0x31286cb1, 0xfa74bf14, 0x1eb014d8, 0xd5ecc77d, + 0x5378b5d3, 0x98246676, 0x852156ce, 0x4e7d856b, 0xc8e9f7c5, + 0x03b52460, 0xf2e396b5, 0x39bf4510, 0xbf2b37be, 0x7477e41b, + 0x6972d4a3, 0xa22e0706, 0x24ba75a8, 0xefe6a60d, 0x1d661643, + 0xd63ac5e6, 0x50aeb748, 0x9bf264ed, 0x86f75455, 0x4dab87f0, + 0xcb3ff55e, 0x006326fb, 0xf135942e, 0x3a69478b, 0xbcfd3525, + 0x77a1e680, 0x6aa4d638, 0xa1f8059d, 0x276c7733, 0xec30a496, + 0x191c11ee, 0xd240c24b, 0x54d4b0e5, 0x9f886340, 0x828d53f8, + 0x49d1805d, 0xcf45f2f3, 0x04192156, 0xf54f9383, 0x3e134026, + 0xb8873288, 0x73dbe12d, 0x6eded195, 0xa5820230, 0x2316709e, + 0xe84aa33b, 0x1aca1375, 0xd196c0d0, 0x5702b27e, 0x9c5e61db, + 0x815b5163, 0x4a0782c6, 0xcc93f068, 0x07cf23cd, 0xf6999118, + 0x3dc542bd, 0xbb513013, 0x700de3b6, 0x6d08d30e, 0xa65400ab, + 0x20c07205, 0xeb9ca1a0, 0x11e81eb4, 0xdab4cd11, 0x5c20bfbf, + 0x977c6c1a, 0x8a795ca2, 0x41258f07, 0xc7b1fda9, 0x0ced2e0c, + 0xfdbb9cd9, 0x36e74f7c, 0xb0733dd2, 0x7b2fee77, 0x662adecf, + 0xad760d6a, 0x2be27fc4, 0xe0beac61, 0x123e1c2f, 0xd962cf8a, + 0x5ff6bd24, 0x94aa6e81, 0x89af5e39, 0x42f38d9c, 0xc467ff32, + 0x0f3b2c97, 0xfe6d9e42, 0x35314de7, 0xb3a53f49, 0x78f9ecec, + 0x65fcdc54, 0xaea00ff1, 0x28347d5f, 0xe368aefa, 0x16441b82, + 0xdd18c827, 0x5b8cba89, 0x90d0692c, 0x8dd55994, 0x46898a31, + 0xc01df89f, 0x0b412b3a, 0xfa1799ef, 0x314b4a4a, 0xb7df38e4, + 0x7c83eb41, 0x6186dbf9, 0xaada085c, 0x2c4e7af2, 0xe712a957, + 0x15921919, 0xdececabc, 0x585ab812, 0x93066bb7, 0x8e035b0f, + 0x455f88aa, 0xc3cbfa04, 0x089729a1, 0xf9c19b74, 0x329d48d1, + 0xb4093a7f, 0x7f55e9da, 0x6250d962, 0xa90c0ac7, 0x2f987869, + 0xe4c4abcc}, + {0x00000000, 0x3d6029b0, 0x7ac05360, 0x47a07ad0, 0xf580a6c0, + 0xc8e08f70, 0x8f40f5a0, 0xb220dc10, 0x30704bc1, 0x0d106271, + 0x4ab018a1, 0x77d03111, 0xc5f0ed01, 0xf890c4b1, 0xbf30be61, + 0x825097d1, 0x60e09782, 0x5d80be32, 0x1a20c4e2, 0x2740ed52, + 0x95603142, 0xa80018f2, 0xefa06222, 0xd2c04b92, 0x5090dc43, + 0x6df0f5f3, 0x2a508f23, 0x1730a693, 0xa5107a83, 0x98705333, + 0xdfd029e3, 0xe2b00053, 0xc1c12f04, 0xfca106b4, 0xbb017c64, + 0x866155d4, 0x344189c4, 0x0921a074, 0x4e81daa4, 0x73e1f314, + 0xf1b164c5, 0xccd14d75, 0x8b7137a5, 0xb6111e15, 0x0431c205, + 0x3951ebb5, 0x7ef19165, 0x4391b8d5, 0xa121b886, 0x9c419136, + 0xdbe1ebe6, 0xe681c256, 0x54a11e46, 0x69c137f6, 0x2e614d26, + 0x13016496, 0x9151f347, 0xac31daf7, 0xeb91a027, 0xd6f18997, + 0x64d15587, 0x59b17c37, 0x1e1106e7, 0x23712f57, 0x58f35849, + 0x659371f9, 0x22330b29, 0x1f532299, 0xad73fe89, 0x9013d739, + 0xd7b3ade9, 0xead38459, 0x68831388, 0x55e33a38, 0x124340e8, + 0x2f236958, 0x9d03b548, 0xa0639cf8, 0xe7c3e628, 0xdaa3cf98, + 0x3813cfcb, 0x0573e67b, 0x42d39cab, 0x7fb3b51b, 0xcd93690b, + 0xf0f340bb, 0xb7533a6b, 0x8a3313db, 0x0863840a, 0x3503adba, + 0x72a3d76a, 0x4fc3feda, 0xfde322ca, 0xc0830b7a, 0x872371aa, + 0xba43581a, 0x9932774d, 0xa4525efd, 0xe3f2242d, 0xde920d9d, + 0x6cb2d18d, 0x51d2f83d, 0x167282ed, 0x2b12ab5d, 0xa9423c8c, + 0x9422153c, 0xd3826fec, 0xeee2465c, 0x5cc29a4c, 0x61a2b3fc, + 0x2602c92c, 0x1b62e09c, 0xf9d2e0cf, 0xc4b2c97f, 0x8312b3af, + 0xbe729a1f, 0x0c52460f, 0x31326fbf, 0x7692156f, 0x4bf23cdf, + 0xc9a2ab0e, 0xf4c282be, 0xb362f86e, 0x8e02d1de, 0x3c220dce, + 0x0142247e, 0x46e25eae, 0x7b82771e, 0xb1e6b092, 0x8c869922, + 0xcb26e3f2, 0xf646ca42, 0x44661652, 0x79063fe2, 0x3ea64532, + 0x03c66c82, 0x8196fb53, 0xbcf6d2e3, 0xfb56a833, 0xc6368183, + 0x74165d93, 0x49767423, 0x0ed60ef3, 0x33b62743, 0xd1062710, + 0xec660ea0, 0xabc67470, 0x96a65dc0, 0x248681d0, 0x19e6a860, + 0x5e46d2b0, 0x6326fb00, 0xe1766cd1, 0xdc164561, 0x9bb63fb1, + 0xa6d61601, 0x14f6ca11, 0x2996e3a1, 0x6e369971, 0x5356b0c1, + 0x70279f96, 0x4d47b626, 0x0ae7ccf6, 0x3787e546, 0x85a73956, + 0xb8c710e6, 0xff676a36, 0xc2074386, 0x4057d457, 0x7d37fde7, + 0x3a978737, 0x07f7ae87, 0xb5d77297, 0x88b75b27, 0xcf1721f7, + 0xf2770847, 0x10c70814, 0x2da721a4, 0x6a075b74, 0x576772c4, + 0xe547aed4, 0xd8278764, 0x9f87fdb4, 0xa2e7d404, 0x20b743d5, + 0x1dd76a65, 0x5a7710b5, 0x67173905, 0xd537e515, 0xe857cca5, + 0xaff7b675, 0x92979fc5, 0xe915e8db, 0xd475c16b, 0x93d5bbbb, + 0xaeb5920b, 0x1c954e1b, 0x21f567ab, 0x66551d7b, 0x5b3534cb, + 0xd965a31a, 0xe4058aaa, 0xa3a5f07a, 0x9ec5d9ca, 0x2ce505da, + 0x11852c6a, 0x562556ba, 0x6b457f0a, 0x89f57f59, 0xb49556e9, + 0xf3352c39, 0xce550589, 0x7c75d999, 0x4115f029, 0x06b58af9, + 0x3bd5a349, 0xb9853498, 0x84e51d28, 0xc34567f8, 0xfe254e48, + 0x4c059258, 0x7165bbe8, 0x36c5c138, 0x0ba5e888, 0x28d4c7df, + 0x15b4ee6f, 0x521494bf, 0x6f74bd0f, 0xdd54611f, 0xe03448af, + 0xa794327f, 0x9af41bcf, 0x18a48c1e, 0x25c4a5ae, 0x6264df7e, + 0x5f04f6ce, 0xed242ade, 0xd044036e, 0x97e479be, 0xaa84500e, + 0x4834505d, 0x755479ed, 0x32f4033d, 0x0f942a8d, 0xbdb4f69d, + 0x80d4df2d, 0xc774a5fd, 0xfa148c4d, 0x78441b9c, 0x4524322c, + 0x028448fc, 0x3fe4614c, 0x8dc4bd5c, 0xb0a494ec, 0xf704ee3c, + 0xca64c78c}, + {0x00000000, 0xb8bc6765, 0xaa09c88b, 0x12b5afee, 0x8f629757, + 0x37def032, 0x256b5fdc, 0x9dd738b9, 0xc5b428ef, 0x7d084f8a, + 0x6fbde064, 0xd7018701, 0x4ad6bfb8, 0xf26ad8dd, 0xe0df7733, + 0x58631056, 0x5019579f, 0xe8a530fa, 0xfa109f14, 0x42acf871, + 0xdf7bc0c8, 0x67c7a7ad, 0x75720843, 0xcdce6f26, 0x95ad7f70, + 0x2d111815, 0x3fa4b7fb, 0x8718d09e, 0x1acfe827, 0xa2738f42, + 0xb0c620ac, 0x087a47c9, 0xa032af3e, 0x188ec85b, 0x0a3b67b5, + 0xb28700d0, 0x2f503869, 0x97ec5f0c, 0x8559f0e2, 0x3de59787, + 0x658687d1, 0xdd3ae0b4, 0xcf8f4f5a, 0x7733283f, 0xeae41086, + 0x525877e3, 0x40edd80d, 0xf851bf68, 0xf02bf8a1, 0x48979fc4, + 0x5a22302a, 0xe29e574f, 0x7f496ff6, 0xc7f50893, 0xd540a77d, + 0x6dfcc018, 0x359fd04e, 0x8d23b72b, 0x9f9618c5, 0x272a7fa0, + 0xbafd4719, 0x0241207c, 0x10f48f92, 0xa848e8f7, 0x9b14583d, + 0x23a83f58, 0x311d90b6, 0x89a1f7d3, 0x1476cf6a, 0xaccaa80f, + 0xbe7f07e1, 0x06c36084, 0x5ea070d2, 0xe61c17b7, 0xf4a9b859, + 0x4c15df3c, 0xd1c2e785, 0x697e80e0, 0x7bcb2f0e, 0xc377486b, + 0xcb0d0fa2, 0x73b168c7, 0x6104c729, 0xd9b8a04c, 0x446f98f5, + 0xfcd3ff90, 0xee66507e, 0x56da371b, 0x0eb9274d, 0xb6054028, + 0xa4b0efc6, 0x1c0c88a3, 0x81dbb01a, 0x3967d77f, 0x2bd27891, + 0x936e1ff4, 0x3b26f703, 0x839a9066, 0x912f3f88, 0x299358ed, + 0xb4446054, 0x0cf80731, 0x1e4da8df, 0xa6f1cfba, 0xfe92dfec, + 0x462eb889, 0x549b1767, 0xec277002, 0x71f048bb, 0xc94c2fde, + 0xdbf98030, 0x6345e755, 0x6b3fa09c, 0xd383c7f9, 0xc1366817, + 0x798a0f72, 0xe45d37cb, 0x5ce150ae, 0x4e54ff40, 0xf6e89825, + 0xae8b8873, 0x1637ef16, 0x048240f8, 0xbc3e279d, 0x21e91f24, + 0x99557841, 0x8be0d7af, 0x335cb0ca, 0xed59b63b, 0x55e5d15e, + 0x47507eb0, 0xffec19d5, 0x623b216c, 0xda874609, 0xc832e9e7, + 0x708e8e82, 0x28ed9ed4, 0x9051f9b1, 0x82e4565f, 0x3a58313a, + 0xa78f0983, 0x1f336ee6, 0x0d86c108, 0xb53aa66d, 0xbd40e1a4, + 0x05fc86c1, 0x1749292f, 0xaff54e4a, 0x322276f3, 0x8a9e1196, + 0x982bbe78, 0x2097d91d, 0x78f4c94b, 0xc048ae2e, 0xd2fd01c0, + 0x6a4166a5, 0xf7965e1c, 0x4f2a3979, 0x5d9f9697, 0xe523f1f2, + 0x4d6b1905, 0xf5d77e60, 0xe762d18e, 0x5fdeb6eb, 0xc2098e52, + 0x7ab5e937, 0x680046d9, 0xd0bc21bc, 0x88df31ea, 0x3063568f, + 0x22d6f961, 0x9a6a9e04, 0x07bda6bd, 0xbf01c1d8, 0xadb46e36, + 0x15080953, 0x1d724e9a, 0xa5ce29ff, 0xb77b8611, 0x0fc7e174, + 0x9210d9cd, 0x2aacbea8, 0x38191146, 0x80a57623, 0xd8c66675, + 0x607a0110, 0x72cfaefe, 0xca73c99b, 0x57a4f122, 0xef189647, + 0xfdad39a9, 0x45115ecc, 0x764dee06, 0xcef18963, 0xdc44268d, + 0x64f841e8, 0xf92f7951, 0x41931e34, 0x5326b1da, 0xeb9ad6bf, + 0xb3f9c6e9, 0x0b45a18c, 0x19f00e62, 0xa14c6907, 0x3c9b51be, + 0x842736db, 0x96929935, 0x2e2efe50, 0x2654b999, 0x9ee8defc, + 0x8c5d7112, 0x34e11677, 0xa9362ece, 0x118a49ab, 0x033fe645, + 0xbb838120, 0xe3e09176, 0x5b5cf613, 0x49e959fd, 0xf1553e98, + 0x6c820621, 0xd43e6144, 0xc68bceaa, 0x7e37a9cf, 0xd67f4138, + 0x6ec3265d, 0x7c7689b3, 0xc4caeed6, 0x591dd66f, 0xe1a1b10a, + 0xf3141ee4, 0x4ba87981, 0x13cb69d7, 0xab770eb2, 0xb9c2a15c, + 0x017ec639, 0x9ca9fe80, 0x241599e5, 0x36a0360b, 0x8e1c516e, + 0x866616a7, 0x3eda71c2, 0x2c6fde2c, 0x94d3b949, 0x090481f0, + 0xb1b8e695, 0xa30d497b, 0x1bb12e1e, 0x43d23e48, 0xfb6e592d, + 0xe9dbf6c3, 0x516791a6, 0xccb0a91f, 0x740cce7a, 0x66b96194, + 0xde0506f1}, + {0x00000000, 0x01c26a37, 0x0384d46e, 0x0246be59, 0x0709a8dc, + 0x06cbc2eb, 0x048d7cb2, 0x054f1685, 0x0e1351b8, 0x0fd13b8f, + 0x0d9785d6, 0x0c55efe1, 0x091af964, 0x08d89353, 0x0a9e2d0a, + 0x0b5c473d, 0x1c26a370, 0x1de4c947, 0x1fa2771e, 0x1e601d29, + 0x1b2f0bac, 0x1aed619b, 0x18abdfc2, 0x1969b5f5, 0x1235f2c8, + 0x13f798ff, 0x11b126a6, 0x10734c91, 0x153c5a14, 0x14fe3023, + 0x16b88e7a, 0x177ae44d, 0x384d46e0, 0x398f2cd7, 0x3bc9928e, + 0x3a0bf8b9, 0x3f44ee3c, 0x3e86840b, 0x3cc03a52, 0x3d025065, + 0x365e1758, 0x379c7d6f, 0x35dac336, 0x3418a901, 0x3157bf84, + 0x3095d5b3, 0x32d36bea, 0x331101dd, 0x246be590, 0x25a98fa7, + 0x27ef31fe, 0x262d5bc9, 0x23624d4c, 0x22a0277b, 0x20e69922, + 0x2124f315, 0x2a78b428, 0x2bbade1f, 0x29fc6046, 0x283e0a71, + 0x2d711cf4, 0x2cb376c3, 0x2ef5c89a, 0x2f37a2ad, 0x709a8dc0, + 0x7158e7f7, 0x731e59ae, 0x72dc3399, 0x7793251c, 0x76514f2b, + 0x7417f172, 0x75d59b45, 0x7e89dc78, 0x7f4bb64f, 0x7d0d0816, + 0x7ccf6221, 0x798074a4, 0x78421e93, 0x7a04a0ca, 0x7bc6cafd, + 0x6cbc2eb0, 0x6d7e4487, 0x6f38fade, 0x6efa90e9, 0x6bb5866c, + 0x6a77ec5b, 0x68315202, 0x69f33835, 0x62af7f08, 0x636d153f, + 0x612bab66, 0x60e9c151, 0x65a6d7d4, 0x6464bde3, 0x662203ba, + 0x67e0698d, 0x48d7cb20, 0x4915a117, 0x4b531f4e, 0x4a917579, + 0x4fde63fc, 0x4e1c09cb, 0x4c5ab792, 0x4d98dda5, 0x46c49a98, + 0x4706f0af, 0x45404ef6, 0x448224c1, 0x41cd3244, 0x400f5873, + 0x4249e62a, 0x438b8c1d, 0x54f16850, 0x55330267, 0x5775bc3e, + 0x56b7d609, 0x53f8c08c, 0x523aaabb, 0x507c14e2, 0x51be7ed5, + 0x5ae239e8, 0x5b2053df, 0x5966ed86, 0x58a487b1, 0x5deb9134, + 0x5c29fb03, 0x5e6f455a, 0x5fad2f6d, 0xe1351b80, 0xe0f771b7, + 0xe2b1cfee, 0xe373a5d9, 0xe63cb35c, 0xe7fed96b, 0xe5b86732, + 0xe47a0d05, 0xef264a38, 0xeee4200f, 0xeca29e56, 0xed60f461, + 0xe82fe2e4, 0xe9ed88d3, 0xebab368a, 0xea695cbd, 0xfd13b8f0, + 0xfcd1d2c7, 0xfe976c9e, 0xff5506a9, 0xfa1a102c, 0xfbd87a1b, + 0xf99ec442, 0xf85cae75, 0xf300e948, 0xf2c2837f, 0xf0843d26, + 0xf1465711, 0xf4094194, 0xf5cb2ba3, 0xf78d95fa, 0xf64fffcd, + 0xd9785d60, 0xd8ba3757, 0xdafc890e, 0xdb3ee339, 0xde71f5bc, + 0xdfb39f8b, 0xddf521d2, 0xdc374be5, 0xd76b0cd8, 0xd6a966ef, + 0xd4efd8b6, 0xd52db281, 0xd062a404, 0xd1a0ce33, 0xd3e6706a, + 0xd2241a5d, 0xc55efe10, 0xc49c9427, 0xc6da2a7e, 0xc7184049, + 0xc25756cc, 0xc3953cfb, 0xc1d382a2, 0xc011e895, 0xcb4dafa8, + 0xca8fc59f, 0xc8c97bc6, 0xc90b11f1, 0xcc440774, 0xcd866d43, + 0xcfc0d31a, 0xce02b92d, 0x91af9640, 0x906dfc77, 0x922b422e, + 0x93e92819, 0x96a63e9c, 0x976454ab, 0x9522eaf2, 0x94e080c5, + 0x9fbcc7f8, 0x9e7eadcf, 0x9c381396, 0x9dfa79a1, 0x98b56f24, + 0x99770513, 0x9b31bb4a, 0x9af3d17d, 0x8d893530, 0x8c4b5f07, + 0x8e0de15e, 0x8fcf8b69, 0x8a809dec, 0x8b42f7db, 0x89044982, + 0x88c623b5, 0x839a6488, 0x82580ebf, 0x801eb0e6, 0x81dcdad1, + 0x8493cc54, 0x8551a663, 0x8717183a, 0x86d5720d, 0xa9e2d0a0, + 0xa820ba97, 0xaa6604ce, 0xaba46ef9, 0xaeeb787c, 0xaf29124b, + 0xad6fac12, 0xacadc625, 0xa7f18118, 0xa633eb2f, 0xa4755576, + 0xa5b73f41, 0xa0f829c4, 0xa13a43f3, 0xa37cfdaa, 0xa2be979d, + 0xb5c473d0, 0xb40619e7, 0xb640a7be, 0xb782cd89, 0xb2cddb0c, + 0xb30fb13b, 0xb1490f62, 0xb08b6555, 0xbbd72268, 0xba15485f, + 0xb853f606, 0xb9919c31, 0xbcde8ab4, 0xbd1ce083, 0xbf5a5eda, + 0xbe9834ed}, + {0x00000000, 0x191b3141, 0x32366282, 0x2b2d53c3, 0x646cc504, + 0x7d77f445, 0x565aa786, 0x4f4196c7, 0xc8d98a08, 0xd1c2bb49, + 0xfaefe88a, 0xe3f4d9cb, 0xacb54f0c, 0xb5ae7e4d, 0x9e832d8e, + 0x87981ccf, 0x4ac21251, 0x53d92310, 0x78f470d3, 0x61ef4192, + 0x2eaed755, 0x37b5e614, 0x1c98b5d7, 0x05838496, 0x821b9859, + 0x9b00a918, 0xb02dfadb, 0xa936cb9a, 0xe6775d5d, 0xff6c6c1c, + 0xd4413fdf, 0xcd5a0e9e, 0x958424a2, 0x8c9f15e3, 0xa7b24620, + 0xbea97761, 0xf1e8e1a6, 0xe8f3d0e7, 0xc3de8324, 0xdac5b265, + 0x5d5daeaa, 0x44469feb, 0x6f6bcc28, 0x7670fd69, 0x39316bae, + 0x202a5aef, 0x0b07092c, 0x121c386d, 0xdf4636f3, 0xc65d07b2, + 0xed705471, 0xf46b6530, 0xbb2af3f7, 0xa231c2b6, 0x891c9175, + 0x9007a034, 0x179fbcfb, 0x0e848dba, 0x25a9de79, 0x3cb2ef38, + 0x73f379ff, 0x6ae848be, 0x41c51b7d, 0x58de2a3c, 0xf0794f05, + 0xe9627e44, 0xc24f2d87, 0xdb541cc6, 0x94158a01, 0x8d0ebb40, + 0xa623e883, 0xbf38d9c2, 0x38a0c50d, 0x21bbf44c, 0x0a96a78f, + 0x138d96ce, 0x5ccc0009, 0x45d73148, 0x6efa628b, 0x77e153ca, + 0xbabb5d54, 0xa3a06c15, 0x888d3fd6, 0x91960e97, 0xded79850, + 0xc7cca911, 0xece1fad2, 0xf5facb93, 0x7262d75c, 0x6b79e61d, + 0x4054b5de, 0x594f849f, 0x160e1258, 0x0f152319, 0x243870da, + 0x3d23419b, 0x65fd6ba7, 0x7ce65ae6, 0x57cb0925, 0x4ed03864, + 0x0191aea3, 0x188a9fe2, 0x33a7cc21, 0x2abcfd60, 0xad24e1af, + 0xb43fd0ee, 0x9f12832d, 0x8609b26c, 0xc94824ab, 0xd05315ea, + 0xfb7e4629, 0xe2657768, 0x2f3f79f6, 0x362448b7, 0x1d091b74, + 0x04122a35, 0x4b53bcf2, 0x52488db3, 0x7965de70, 0x607eef31, + 0xe7e6f3fe, 0xfefdc2bf, 0xd5d0917c, 0xcccba03d, 0x838a36fa, + 0x9a9107bb, 0xb1bc5478, 0xa8a76539, 0x3b83984b, 0x2298a90a, + 0x09b5fac9, 0x10aecb88, 0x5fef5d4f, 0x46f46c0e, 0x6dd93fcd, + 0x74c20e8c, 0xf35a1243, 0xea412302, 0xc16c70c1, 0xd8774180, + 0x9736d747, 0x8e2de606, 0xa500b5c5, 0xbc1b8484, 0x71418a1a, + 0x685abb5b, 0x4377e898, 0x5a6cd9d9, 0x152d4f1e, 0x0c367e5f, + 0x271b2d9c, 0x3e001cdd, 0xb9980012, 0xa0833153, 0x8bae6290, + 0x92b553d1, 0xddf4c516, 0xc4eff457, 0xefc2a794, 0xf6d996d5, + 0xae07bce9, 0xb71c8da8, 0x9c31de6b, 0x852aef2a, 0xca6b79ed, + 0xd37048ac, 0xf85d1b6f, 0xe1462a2e, 0x66de36e1, 0x7fc507a0, + 0x54e85463, 0x4df36522, 0x02b2f3e5, 0x1ba9c2a4, 0x30849167, + 0x299fa026, 0xe4c5aeb8, 0xfdde9ff9, 0xd6f3cc3a, 0xcfe8fd7b, + 0x80a96bbc, 0x99b25afd, 0xb29f093e, 0xab84387f, 0x2c1c24b0, + 0x350715f1, 0x1e2a4632, 0x07317773, 0x4870e1b4, 0x516bd0f5, + 0x7a468336, 0x635db277, 0xcbfad74e, 0xd2e1e60f, 0xf9ccb5cc, + 0xe0d7848d, 0xaf96124a, 0xb68d230b, 0x9da070c8, 0x84bb4189, + 0x03235d46, 0x1a386c07, 0x31153fc4, 0x280e0e85, 0x674f9842, + 0x7e54a903, 0x5579fac0, 0x4c62cb81, 0x8138c51f, 0x9823f45e, + 0xb30ea79d, 0xaa1596dc, 0xe554001b, 0xfc4f315a, 0xd7626299, + 0xce7953d8, 0x49e14f17, 0x50fa7e56, 0x7bd72d95, 0x62cc1cd4, + 0x2d8d8a13, 0x3496bb52, 0x1fbbe891, 0x06a0d9d0, 0x5e7ef3ec, + 0x4765c2ad, 0x6c48916e, 0x7553a02f, 0x3a1236e8, 0x230907a9, + 0x0824546a, 0x113f652b, 0x96a779e4, 0x8fbc48a5, 0xa4911b66, + 0xbd8a2a27, 0xf2cbbce0, 0xebd08da1, 0xc0fdde62, 0xd9e6ef23, + 0x14bce1bd, 0x0da7d0fc, 0x268a833f, 0x3f91b27e, 0x70d024b9, + 0x69cb15f8, 0x42e6463b, 0x5bfd777a, 0xdc656bb5, 0xc57e5af4, + 0xee530937, 0xf7483876, 0xb809aeb1, 0xa1129ff0, 0x8a3fcc33, + 0x9324fd72}, + {0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, + 0x706af48f, 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, + 0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, + 0x90bf1d91, 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, + 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, 0x136c9856, + 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9, + 0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, + 0xa2677172, 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, + 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, + 0x45df5c75, 0xdcd60dcf, 0xabd13d59, 0x26d930ac, 0x51de003a, + 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, 0xcfba9599, + 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, + 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190, + 0x01db7106, 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, + 0x9fbfe4a5, 0xe8b8d433, 0x7807c9a2, 0x0f00f934, 0x9609a88e, + 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01, + 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, 0x6c0695ed, + 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950, + 0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, + 0xfbd44c65, 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, + 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, + 0x346ed9fc, 0xad678846, 0xda60b8d0, 0x44042d73, 0x33031de5, + 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa, 0xbe0b1010, + 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, + 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, + 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, + 0x03b6e20c, 0x74b1d29a, 0xead54739, 0x9dd277af, 0x04db2615, + 0x73dc1683, 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, + 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, 0xf00f9344, + 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb, + 0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, + 0x67dd4acc, 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, + 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, + 0xa6bc5767, 0x3fb506dd, 0x48b2364b, 0xd80d2bda, 0xaf0a1b4c, + 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, 0x316e8eef, + 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, + 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, + 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, + 0x2cd99e8b, 0x5bdeae1d, 0x9b64c2b0, 0xec63f226, 0x756aa39c, + 0x026d930a, 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713, + 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 0x92d28e9b, + 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242, + 0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, + 0x18b74777, 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, + 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, 0xa00ae278, + 0xd70dd2ee, 0x4e048354, 0x3903b3c2, 0xa7672661, 0xd06016f7, + 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, 0x40df0b66, + 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, + 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, + 0xcdd70693, 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, + 0x5d681b02, 0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, + 0x2d02ef8d}}; + +local const z_word_t FAR crc_braid_big_table[][256] = { + {0x0000000000000000, 0x9630077700000000, 0x2c610eee00000000, + 0xba51099900000000, 0x19c46d0700000000, 0x8ff46a7000000000, + 0x35a563e900000000, 0xa395649e00000000, 0x3288db0e00000000, + 0xa4b8dc7900000000, 0x1ee9d5e000000000, 0x88d9d29700000000, + 0x2b4cb60900000000, 0xbd7cb17e00000000, 0x072db8e700000000, + 0x911dbf9000000000, 0x6410b71d00000000, 0xf220b06a00000000, + 0x4871b9f300000000, 0xde41be8400000000, 0x7dd4da1a00000000, + 0xebe4dd6d00000000, 0x51b5d4f400000000, 0xc785d38300000000, + 0x56986c1300000000, 0xc0a86b6400000000, 0x7af962fd00000000, + 0xecc9658a00000000, 0x4f5c011400000000, 0xd96c066300000000, + 0x633d0ffa00000000, 0xf50d088d00000000, 0xc8206e3b00000000, + 0x5e10694c00000000, 0xe44160d500000000, 0x727167a200000000, + 0xd1e4033c00000000, 0x47d4044b00000000, 0xfd850dd200000000, + 0x6bb50aa500000000, 0xfaa8b53500000000, 0x6c98b24200000000, + 0xd6c9bbdb00000000, 0x40f9bcac00000000, 0xe36cd83200000000, + 0x755cdf4500000000, 0xcf0dd6dc00000000, 0x593dd1ab00000000, + 0xac30d92600000000, 0x3a00de5100000000, 0x8051d7c800000000, + 0x1661d0bf00000000, 0xb5f4b42100000000, 0x23c4b35600000000, + 0x9995bacf00000000, 0x0fa5bdb800000000, 0x9eb8022800000000, + 0x0888055f00000000, 0xb2d90cc600000000, 0x24e90bb100000000, + 0x877c6f2f00000000, 0x114c685800000000, 0xab1d61c100000000, + 0x3d2d66b600000000, 0x9041dc7600000000, 0x0671db0100000000, + 0xbc20d29800000000, 0x2a10d5ef00000000, 0x8985b17100000000, + 0x1fb5b60600000000, 0xa5e4bf9f00000000, 0x33d4b8e800000000, + 0xa2c9077800000000, 0x34f9000f00000000, 0x8ea8099600000000, + 0x18980ee100000000, 0xbb0d6a7f00000000, 0x2d3d6d0800000000, + 0x976c649100000000, 0x015c63e600000000, 0xf4516b6b00000000, + 0x62616c1c00000000, 0xd830658500000000, 0x4e0062f200000000, + 0xed95066c00000000, 0x7ba5011b00000000, 0xc1f4088200000000, + 0x57c40ff500000000, 0xc6d9b06500000000, 0x50e9b71200000000, + 0xeab8be8b00000000, 0x7c88b9fc00000000, 0xdf1ddd6200000000, + 0x492dda1500000000, 0xf37cd38c00000000, 0x654cd4fb00000000, + 0x5861b24d00000000, 0xce51b53a00000000, 0x7400bca300000000, + 0xe230bbd400000000, 0x41a5df4a00000000, 0xd795d83d00000000, + 0x6dc4d1a400000000, 0xfbf4d6d300000000, 0x6ae9694300000000, + 0xfcd96e3400000000, 0x468867ad00000000, 0xd0b860da00000000, + 0x732d044400000000, 0xe51d033300000000, 0x5f4c0aaa00000000, + 0xc97c0ddd00000000, 0x3c71055000000000, 0xaa41022700000000, + 0x10100bbe00000000, 0x86200cc900000000, 0x25b5685700000000, + 0xb3856f2000000000, 0x09d466b900000000, 0x9fe461ce00000000, + 0x0ef9de5e00000000, 0x98c9d92900000000, 0x2298d0b000000000, + 0xb4a8d7c700000000, 0x173db35900000000, 0x810db42e00000000, + 0x3b5cbdb700000000, 0xad6cbac000000000, 0x2083b8ed00000000, + 0xb6b3bf9a00000000, 0x0ce2b60300000000, 0x9ad2b17400000000, + 0x3947d5ea00000000, 0xaf77d29d00000000, 0x1526db0400000000, + 0x8316dc7300000000, 0x120b63e300000000, 0x843b649400000000, + 0x3e6a6d0d00000000, 0xa85a6a7a00000000, 0x0bcf0ee400000000, + 0x9dff099300000000, 0x27ae000a00000000, 0xb19e077d00000000, + 0x44930ff000000000, 0xd2a3088700000000, 0x68f2011e00000000, + 0xfec2066900000000, 0x5d5762f700000000, 0xcb67658000000000, + 0x71366c1900000000, 0xe7066b6e00000000, 0x761bd4fe00000000, + 0xe02bd38900000000, 0x5a7ada1000000000, 0xcc4add6700000000, + 0x6fdfb9f900000000, 0xf9efbe8e00000000, 0x43beb71700000000, + 0xd58eb06000000000, 0xe8a3d6d600000000, 0x7e93d1a100000000, + 0xc4c2d83800000000, 0x52f2df4f00000000, 0xf167bbd100000000, + 0x6757bca600000000, 0xdd06b53f00000000, 0x4b36b24800000000, + 0xda2b0dd800000000, 0x4c1b0aaf00000000, 0xf64a033600000000, + 0x607a044100000000, 0xc3ef60df00000000, 0x55df67a800000000, + 0xef8e6e3100000000, 0x79be694600000000, 0x8cb361cb00000000, + 0x1a8366bc00000000, 0xa0d26f2500000000, 0x36e2685200000000, + 0x95770ccc00000000, 0x03470bbb00000000, 0xb916022200000000, + 0x2f26055500000000, 0xbe3bbac500000000, 0x280bbdb200000000, + 0x925ab42b00000000, 0x046ab35c00000000, 0xa7ffd7c200000000, + 0x31cfd0b500000000, 0x8b9ed92c00000000, 0x1daede5b00000000, + 0xb0c2649b00000000, 0x26f263ec00000000, 0x9ca36a7500000000, + 0x0a936d0200000000, 0xa906099c00000000, 0x3f360eeb00000000, + 0x8567077200000000, 0x1357000500000000, 0x824abf9500000000, + 0x147ab8e200000000, 0xae2bb17b00000000, 0x381bb60c00000000, + 0x9b8ed29200000000, 0x0dbed5e500000000, 0xb7efdc7c00000000, + 0x21dfdb0b00000000, 0xd4d2d38600000000, 0x42e2d4f100000000, + 0xf8b3dd6800000000, 0x6e83da1f00000000, 0xcd16be8100000000, + 0x5b26b9f600000000, 0xe177b06f00000000, 0x7747b71800000000, + 0xe65a088800000000, 0x706a0fff00000000, 0xca3b066600000000, + 0x5c0b011100000000, 0xff9e658f00000000, 0x69ae62f800000000, + 0xd3ff6b6100000000, 0x45cf6c1600000000, 0x78e20aa000000000, + 0xeed20dd700000000, 0x5483044e00000000, 0xc2b3033900000000, + 0x612667a700000000, 0xf71660d000000000, 0x4d47694900000000, + 0xdb776e3e00000000, 0x4a6ad1ae00000000, 0xdc5ad6d900000000, + 0x660bdf4000000000, 0xf03bd83700000000, 0x53aebca900000000, + 0xc59ebbde00000000, 0x7fcfb24700000000, 0xe9ffb53000000000, + 0x1cf2bdbd00000000, 0x8ac2baca00000000, 0x3093b35300000000, + 0xa6a3b42400000000, 0x0536d0ba00000000, 0x9306d7cd00000000, + 0x2957de5400000000, 0xbf67d92300000000, 0x2e7a66b300000000, + 0xb84a61c400000000, 0x021b685d00000000, 0x942b6f2a00000000, + 0x37be0bb400000000, 0xa18e0cc300000000, 0x1bdf055a00000000, + 0x8def022d00000000}, + {0x0000000000000000, 0x41311b1900000000, 0x8262363200000000, + 0xc3532d2b00000000, 0x04c56c6400000000, 0x45f4777d00000000, + 0x86a75a5600000000, 0xc796414f00000000, 0x088ad9c800000000, + 0x49bbc2d100000000, 0x8ae8effa00000000, 0xcbd9f4e300000000, + 0x0c4fb5ac00000000, 0x4d7eaeb500000000, 0x8e2d839e00000000, + 0xcf1c988700000000, 0x5112c24a00000000, 0x1023d95300000000, + 0xd370f47800000000, 0x9241ef6100000000, 0x55d7ae2e00000000, + 0x14e6b53700000000, 0xd7b5981c00000000, 0x9684830500000000, + 0x59981b8200000000, 0x18a9009b00000000, 0xdbfa2db000000000, + 0x9acb36a900000000, 0x5d5d77e600000000, 0x1c6c6cff00000000, + 0xdf3f41d400000000, 0x9e0e5acd00000000, 0xa224849500000000, + 0xe3159f8c00000000, 0x2046b2a700000000, 0x6177a9be00000000, + 0xa6e1e8f100000000, 0xe7d0f3e800000000, 0x2483dec300000000, + 0x65b2c5da00000000, 0xaaae5d5d00000000, 0xeb9f464400000000, + 0x28cc6b6f00000000, 0x69fd707600000000, 0xae6b313900000000, + 0xef5a2a2000000000, 0x2c09070b00000000, 0x6d381c1200000000, + 0xf33646df00000000, 0xb2075dc600000000, 0x715470ed00000000, + 0x30656bf400000000, 0xf7f32abb00000000, 0xb6c231a200000000, + 0x75911c8900000000, 0x34a0079000000000, 0xfbbc9f1700000000, + 0xba8d840e00000000, 0x79dea92500000000, 0x38efb23c00000000, + 0xff79f37300000000, 0xbe48e86a00000000, 0x7d1bc54100000000, + 0x3c2ade5800000000, 0x054f79f000000000, 0x447e62e900000000, + 0x872d4fc200000000, 0xc61c54db00000000, 0x018a159400000000, + 0x40bb0e8d00000000, 0x83e823a600000000, 0xc2d938bf00000000, + 0x0dc5a03800000000, 0x4cf4bb2100000000, 0x8fa7960a00000000, + 0xce968d1300000000, 0x0900cc5c00000000, 0x4831d74500000000, + 0x8b62fa6e00000000, 0xca53e17700000000, 0x545dbbba00000000, + 0x156ca0a300000000, 0xd63f8d8800000000, 0x970e969100000000, + 0x5098d7de00000000, 0x11a9ccc700000000, 0xd2fae1ec00000000, + 0x93cbfaf500000000, 0x5cd7627200000000, 0x1de6796b00000000, + 0xdeb5544000000000, 0x9f844f5900000000, 0x58120e1600000000, + 0x1923150f00000000, 0xda70382400000000, 0x9b41233d00000000, + 0xa76bfd6500000000, 0xe65ae67c00000000, 0x2509cb5700000000, + 0x6438d04e00000000, 0xa3ae910100000000, 0xe29f8a1800000000, + 0x21cca73300000000, 0x60fdbc2a00000000, 0xafe124ad00000000, + 0xeed03fb400000000, 0x2d83129f00000000, 0x6cb2098600000000, + 0xab2448c900000000, 0xea1553d000000000, 0x29467efb00000000, + 0x687765e200000000, 0xf6793f2f00000000, 0xb748243600000000, + 0x741b091d00000000, 0x352a120400000000, 0xf2bc534b00000000, + 0xb38d485200000000, 0x70de657900000000, 0x31ef7e6000000000, + 0xfef3e6e700000000, 0xbfc2fdfe00000000, 0x7c91d0d500000000, + 0x3da0cbcc00000000, 0xfa368a8300000000, 0xbb07919a00000000, + 0x7854bcb100000000, 0x3965a7a800000000, 0x4b98833b00000000, + 0x0aa9982200000000, 0xc9fab50900000000, 0x88cbae1000000000, + 0x4f5def5f00000000, 0x0e6cf44600000000, 0xcd3fd96d00000000, + 0x8c0ec27400000000, 0x43125af300000000, 0x022341ea00000000, + 0xc1706cc100000000, 0x804177d800000000, 0x47d7369700000000, + 0x06e62d8e00000000, 0xc5b500a500000000, 0x84841bbc00000000, + 0x1a8a417100000000, 0x5bbb5a6800000000, 0x98e8774300000000, + 0xd9d96c5a00000000, 0x1e4f2d1500000000, 0x5f7e360c00000000, + 0x9c2d1b2700000000, 0xdd1c003e00000000, 0x120098b900000000, + 0x533183a000000000, 0x9062ae8b00000000, 0xd153b59200000000, + 0x16c5f4dd00000000, 0x57f4efc400000000, 0x94a7c2ef00000000, + 0xd596d9f600000000, 0xe9bc07ae00000000, 0xa88d1cb700000000, + 0x6bde319c00000000, 0x2aef2a8500000000, 0xed796bca00000000, + 0xac4870d300000000, 0x6f1b5df800000000, 0x2e2a46e100000000, + 0xe136de6600000000, 0xa007c57f00000000, 0x6354e85400000000, + 0x2265f34d00000000, 0xe5f3b20200000000, 0xa4c2a91b00000000, + 0x6791843000000000, 0x26a09f2900000000, 0xb8aec5e400000000, + 0xf99fdefd00000000, 0x3accf3d600000000, 0x7bfde8cf00000000, + 0xbc6ba98000000000, 0xfd5ab29900000000, 0x3e099fb200000000, + 0x7f3884ab00000000, 0xb0241c2c00000000, 0xf115073500000000, + 0x32462a1e00000000, 0x7377310700000000, 0xb4e1704800000000, + 0xf5d06b5100000000, 0x3683467a00000000, 0x77b25d6300000000, + 0x4ed7facb00000000, 0x0fe6e1d200000000, 0xccb5ccf900000000, + 0x8d84d7e000000000, 0x4a1296af00000000, 0x0b238db600000000, + 0xc870a09d00000000, 0x8941bb8400000000, 0x465d230300000000, + 0x076c381a00000000, 0xc43f153100000000, 0x850e0e2800000000, + 0x42984f6700000000, 0x03a9547e00000000, 0xc0fa795500000000, + 0x81cb624c00000000, 0x1fc5388100000000, 0x5ef4239800000000, + 0x9da70eb300000000, 0xdc9615aa00000000, 0x1b0054e500000000, + 0x5a314ffc00000000, 0x996262d700000000, 0xd85379ce00000000, + 0x174fe14900000000, 0x567efa5000000000, 0x952dd77b00000000, + 0xd41ccc6200000000, 0x138a8d2d00000000, 0x52bb963400000000, + 0x91e8bb1f00000000, 0xd0d9a00600000000, 0xecf37e5e00000000, + 0xadc2654700000000, 0x6e91486c00000000, 0x2fa0537500000000, + 0xe836123a00000000, 0xa907092300000000, 0x6a54240800000000, + 0x2b653f1100000000, 0xe479a79600000000, 0xa548bc8f00000000, + 0x661b91a400000000, 0x272a8abd00000000, 0xe0bccbf200000000, + 0xa18dd0eb00000000, 0x62defdc000000000, 0x23efe6d900000000, + 0xbde1bc1400000000, 0xfcd0a70d00000000, 0x3f838a2600000000, + 0x7eb2913f00000000, 0xb924d07000000000, 0xf815cb6900000000, + 0x3b46e64200000000, 0x7a77fd5b00000000, 0xb56b65dc00000000, + 0xf45a7ec500000000, 0x370953ee00000000, 0x763848f700000000, + 0xb1ae09b800000000, 0xf09f12a100000000, 0x33cc3f8a00000000, + 0x72fd249300000000}, + {0x0000000000000000, 0x376ac20100000000, 0x6ed4840300000000, + 0x59be460200000000, 0xdca8090700000000, 0xebc2cb0600000000, + 0xb27c8d0400000000, 0x85164f0500000000, 0xb851130e00000000, + 0x8f3bd10f00000000, 0xd685970d00000000, 0xe1ef550c00000000, + 0x64f91a0900000000, 0x5393d80800000000, 0x0a2d9e0a00000000, + 0x3d475c0b00000000, 0x70a3261c00000000, 0x47c9e41d00000000, + 0x1e77a21f00000000, 0x291d601e00000000, 0xac0b2f1b00000000, + 0x9b61ed1a00000000, 0xc2dfab1800000000, 0xf5b5691900000000, + 0xc8f2351200000000, 0xff98f71300000000, 0xa626b11100000000, + 0x914c731000000000, 0x145a3c1500000000, 0x2330fe1400000000, + 0x7a8eb81600000000, 0x4de47a1700000000, 0xe0464d3800000000, + 0xd72c8f3900000000, 0x8e92c93b00000000, 0xb9f80b3a00000000, + 0x3cee443f00000000, 0x0b84863e00000000, 0x523ac03c00000000, + 0x6550023d00000000, 0x58175e3600000000, 0x6f7d9c3700000000, + 0x36c3da3500000000, 0x01a9183400000000, 0x84bf573100000000, + 0xb3d5953000000000, 0xea6bd33200000000, 0xdd01113300000000, + 0x90e56b2400000000, 0xa78fa92500000000, 0xfe31ef2700000000, + 0xc95b2d2600000000, 0x4c4d622300000000, 0x7b27a02200000000, + 0x2299e62000000000, 0x15f3242100000000, 0x28b4782a00000000, + 0x1fdeba2b00000000, 0x4660fc2900000000, 0x710a3e2800000000, + 0xf41c712d00000000, 0xc376b32c00000000, 0x9ac8f52e00000000, + 0xada2372f00000000, 0xc08d9a7000000000, 0xf7e7587100000000, + 0xae591e7300000000, 0x9933dc7200000000, 0x1c25937700000000, + 0x2b4f517600000000, 0x72f1177400000000, 0x459bd57500000000, + 0x78dc897e00000000, 0x4fb64b7f00000000, 0x16080d7d00000000, + 0x2162cf7c00000000, 0xa474807900000000, 0x931e427800000000, + 0xcaa0047a00000000, 0xfdcac67b00000000, 0xb02ebc6c00000000, + 0x87447e6d00000000, 0xdefa386f00000000, 0xe990fa6e00000000, + 0x6c86b56b00000000, 0x5bec776a00000000, 0x0252316800000000, + 0x3538f36900000000, 0x087faf6200000000, 0x3f156d6300000000, + 0x66ab2b6100000000, 0x51c1e96000000000, 0xd4d7a66500000000, + 0xe3bd646400000000, 0xba03226600000000, 0x8d69e06700000000, + 0x20cbd74800000000, 0x17a1154900000000, 0x4e1f534b00000000, + 0x7975914a00000000, 0xfc63de4f00000000, 0xcb091c4e00000000, + 0x92b75a4c00000000, 0xa5dd984d00000000, 0x989ac44600000000, + 0xaff0064700000000, 0xf64e404500000000, 0xc124824400000000, + 0x4432cd4100000000, 0x73580f4000000000, 0x2ae6494200000000, + 0x1d8c8b4300000000, 0x5068f15400000000, 0x6702335500000000, + 0x3ebc755700000000, 0x09d6b75600000000, 0x8cc0f85300000000, + 0xbbaa3a5200000000, 0xe2147c5000000000, 0xd57ebe5100000000, + 0xe839e25a00000000, 0xdf53205b00000000, 0x86ed665900000000, + 0xb187a45800000000, 0x3491eb5d00000000, 0x03fb295c00000000, + 0x5a456f5e00000000, 0x6d2fad5f00000000, 0x801b35e100000000, + 0xb771f7e000000000, 0xeecfb1e200000000, 0xd9a573e300000000, + 0x5cb33ce600000000, 0x6bd9fee700000000, 0x3267b8e500000000, + 0x050d7ae400000000, 0x384a26ef00000000, 0x0f20e4ee00000000, + 0x569ea2ec00000000, 0x61f460ed00000000, 0xe4e22fe800000000, + 0xd388ede900000000, 0x8a36abeb00000000, 0xbd5c69ea00000000, + 0xf0b813fd00000000, 0xc7d2d1fc00000000, 0x9e6c97fe00000000, + 0xa90655ff00000000, 0x2c101afa00000000, 0x1b7ad8fb00000000, + 0x42c49ef900000000, 0x75ae5cf800000000, 0x48e900f300000000, + 0x7f83c2f200000000, 0x263d84f000000000, 0x115746f100000000, + 0x944109f400000000, 0xa32bcbf500000000, 0xfa958df700000000, + 0xcdff4ff600000000, 0x605d78d900000000, 0x5737bad800000000, + 0x0e89fcda00000000, 0x39e33edb00000000, 0xbcf571de00000000, + 0x8b9fb3df00000000, 0xd221f5dd00000000, 0xe54b37dc00000000, + 0xd80c6bd700000000, 0xef66a9d600000000, 0xb6d8efd400000000, + 0x81b22dd500000000, 0x04a462d000000000, 0x33cea0d100000000, + 0x6a70e6d300000000, 0x5d1a24d200000000, 0x10fe5ec500000000, + 0x27949cc400000000, 0x7e2adac600000000, 0x494018c700000000, + 0xcc5657c200000000, 0xfb3c95c300000000, 0xa282d3c100000000, + 0x95e811c000000000, 0xa8af4dcb00000000, 0x9fc58fca00000000, + 0xc67bc9c800000000, 0xf1110bc900000000, 0x740744cc00000000, + 0x436d86cd00000000, 0x1ad3c0cf00000000, 0x2db902ce00000000, + 0x4096af9100000000, 0x77fc6d9000000000, 0x2e422b9200000000, + 0x1928e99300000000, 0x9c3ea69600000000, 0xab54649700000000, + 0xf2ea229500000000, 0xc580e09400000000, 0xf8c7bc9f00000000, + 0xcfad7e9e00000000, 0x9613389c00000000, 0xa179fa9d00000000, + 0x246fb59800000000, 0x1305779900000000, 0x4abb319b00000000, + 0x7dd1f39a00000000, 0x3035898d00000000, 0x075f4b8c00000000, + 0x5ee10d8e00000000, 0x698bcf8f00000000, 0xec9d808a00000000, + 0xdbf7428b00000000, 0x8249048900000000, 0xb523c68800000000, + 0x88649a8300000000, 0xbf0e588200000000, 0xe6b01e8000000000, + 0xd1dadc8100000000, 0x54cc938400000000, 0x63a6518500000000, + 0x3a18178700000000, 0x0d72d58600000000, 0xa0d0e2a900000000, + 0x97ba20a800000000, 0xce0466aa00000000, 0xf96ea4ab00000000, + 0x7c78ebae00000000, 0x4b1229af00000000, 0x12ac6fad00000000, + 0x25c6adac00000000, 0x1881f1a700000000, 0x2feb33a600000000, + 0x765575a400000000, 0x413fb7a500000000, 0xc429f8a000000000, + 0xf3433aa100000000, 0xaafd7ca300000000, 0x9d97bea200000000, + 0xd073c4b500000000, 0xe71906b400000000, 0xbea740b600000000, + 0x89cd82b700000000, 0x0cdbcdb200000000, 0x3bb10fb300000000, + 0x620f49b100000000, 0x55658bb000000000, 0x6822d7bb00000000, + 0x5f4815ba00000000, 0x06f653b800000000, 0x319c91b900000000, + 0xb48adebc00000000, 0x83e01cbd00000000, 0xda5e5abf00000000, + 0xed3498be00000000}, + {0x0000000000000000, 0x6567bcb800000000, 0x8bc809aa00000000, + 0xeeafb51200000000, 0x5797628f00000000, 0x32f0de3700000000, + 0xdc5f6b2500000000, 0xb938d79d00000000, 0xef28b4c500000000, + 0x8a4f087d00000000, 0x64e0bd6f00000000, 0x018701d700000000, + 0xb8bfd64a00000000, 0xddd86af200000000, 0x3377dfe000000000, + 0x5610635800000000, 0x9f57195000000000, 0xfa30a5e800000000, + 0x149f10fa00000000, 0x71f8ac4200000000, 0xc8c07bdf00000000, + 0xada7c76700000000, 0x4308727500000000, 0x266fcecd00000000, + 0x707fad9500000000, 0x1518112d00000000, 0xfbb7a43f00000000, + 0x9ed0188700000000, 0x27e8cf1a00000000, 0x428f73a200000000, + 0xac20c6b000000000, 0xc9477a0800000000, 0x3eaf32a000000000, + 0x5bc88e1800000000, 0xb5673b0a00000000, 0xd00087b200000000, + 0x6938502f00000000, 0x0c5fec9700000000, 0xe2f0598500000000, + 0x8797e53d00000000, 0xd187866500000000, 0xb4e03add00000000, + 0x5a4f8fcf00000000, 0x3f28337700000000, 0x8610e4ea00000000, + 0xe377585200000000, 0x0dd8ed4000000000, 0x68bf51f800000000, + 0xa1f82bf000000000, 0xc49f974800000000, 0x2a30225a00000000, + 0x4f579ee200000000, 0xf66f497f00000000, 0x9308f5c700000000, + 0x7da740d500000000, 0x18c0fc6d00000000, 0x4ed09f3500000000, + 0x2bb7238d00000000, 0xc518969f00000000, 0xa07f2a2700000000, + 0x1947fdba00000000, 0x7c20410200000000, 0x928ff41000000000, + 0xf7e848a800000000, 0x3d58149b00000000, 0x583fa82300000000, + 0xb6901d3100000000, 0xd3f7a18900000000, 0x6acf761400000000, + 0x0fa8caac00000000, 0xe1077fbe00000000, 0x8460c30600000000, + 0xd270a05e00000000, 0xb7171ce600000000, 0x59b8a9f400000000, + 0x3cdf154c00000000, 0x85e7c2d100000000, 0xe0807e6900000000, + 0x0e2fcb7b00000000, 0x6b4877c300000000, 0xa20f0dcb00000000, + 0xc768b17300000000, 0x29c7046100000000, 0x4ca0b8d900000000, + 0xf5986f4400000000, 0x90ffd3fc00000000, 0x7e5066ee00000000, + 0x1b37da5600000000, 0x4d27b90e00000000, 0x284005b600000000, + 0xc6efb0a400000000, 0xa3880c1c00000000, 0x1ab0db8100000000, + 0x7fd7673900000000, 0x9178d22b00000000, 0xf41f6e9300000000, + 0x03f7263b00000000, 0x66909a8300000000, 0x883f2f9100000000, + 0xed58932900000000, 0x546044b400000000, 0x3107f80c00000000, + 0xdfa84d1e00000000, 0xbacff1a600000000, 0xecdf92fe00000000, + 0x89b82e4600000000, 0x67179b5400000000, 0x027027ec00000000, + 0xbb48f07100000000, 0xde2f4cc900000000, 0x3080f9db00000000, + 0x55e7456300000000, 0x9ca03f6b00000000, 0xf9c783d300000000, + 0x176836c100000000, 0x720f8a7900000000, 0xcb375de400000000, + 0xae50e15c00000000, 0x40ff544e00000000, 0x2598e8f600000000, + 0x73888bae00000000, 0x16ef371600000000, 0xf840820400000000, + 0x9d273ebc00000000, 0x241fe92100000000, 0x4178559900000000, + 0xafd7e08b00000000, 0xcab05c3300000000, 0x3bb659ed00000000, + 0x5ed1e55500000000, 0xb07e504700000000, 0xd519ecff00000000, + 0x6c213b6200000000, 0x094687da00000000, 0xe7e932c800000000, + 0x828e8e7000000000, 0xd49eed2800000000, 0xb1f9519000000000, + 0x5f56e48200000000, 0x3a31583a00000000, 0x83098fa700000000, + 0xe66e331f00000000, 0x08c1860d00000000, 0x6da63ab500000000, + 0xa4e140bd00000000, 0xc186fc0500000000, 0x2f29491700000000, + 0x4a4ef5af00000000, 0xf376223200000000, 0x96119e8a00000000, + 0x78be2b9800000000, 0x1dd9972000000000, 0x4bc9f47800000000, + 0x2eae48c000000000, 0xc001fdd200000000, 0xa566416a00000000, + 0x1c5e96f700000000, 0x79392a4f00000000, 0x97969f5d00000000, + 0xf2f123e500000000, 0x05196b4d00000000, 0x607ed7f500000000, + 0x8ed162e700000000, 0xebb6de5f00000000, 0x528e09c200000000, + 0x37e9b57a00000000, 0xd946006800000000, 0xbc21bcd000000000, + 0xea31df8800000000, 0x8f56633000000000, 0x61f9d62200000000, + 0x049e6a9a00000000, 0xbda6bd0700000000, 0xd8c101bf00000000, + 0x366eb4ad00000000, 0x5309081500000000, 0x9a4e721d00000000, + 0xff29cea500000000, 0x11867bb700000000, 0x74e1c70f00000000, + 0xcdd9109200000000, 0xa8beac2a00000000, 0x4611193800000000, + 0x2376a58000000000, 0x7566c6d800000000, 0x10017a6000000000, + 0xfeaecf7200000000, 0x9bc973ca00000000, 0x22f1a45700000000, + 0x479618ef00000000, 0xa939adfd00000000, 0xcc5e114500000000, + 0x06ee4d7600000000, 0x6389f1ce00000000, 0x8d2644dc00000000, + 0xe841f86400000000, 0x51792ff900000000, 0x341e934100000000, + 0xdab1265300000000, 0xbfd69aeb00000000, 0xe9c6f9b300000000, + 0x8ca1450b00000000, 0x620ef01900000000, 0x07694ca100000000, + 0xbe519b3c00000000, 0xdb36278400000000, 0x3599929600000000, + 0x50fe2e2e00000000, 0x99b9542600000000, 0xfcdee89e00000000, + 0x12715d8c00000000, 0x7716e13400000000, 0xce2e36a900000000, + 0xab498a1100000000, 0x45e63f0300000000, 0x208183bb00000000, + 0x7691e0e300000000, 0x13f65c5b00000000, 0xfd59e94900000000, + 0x983e55f100000000, 0x2106826c00000000, 0x44613ed400000000, + 0xaace8bc600000000, 0xcfa9377e00000000, 0x38417fd600000000, + 0x5d26c36e00000000, 0xb389767c00000000, 0xd6eecac400000000, + 0x6fd61d5900000000, 0x0ab1a1e100000000, 0xe41e14f300000000, + 0x8179a84b00000000, 0xd769cb1300000000, 0xb20e77ab00000000, + 0x5ca1c2b900000000, 0x39c67e0100000000, 0x80fea99c00000000, + 0xe599152400000000, 0x0b36a03600000000, 0x6e511c8e00000000, + 0xa716668600000000, 0xc271da3e00000000, 0x2cde6f2c00000000, + 0x49b9d39400000000, 0xf081040900000000, 0x95e6b8b100000000, + 0x7b490da300000000, 0x1e2eb11b00000000, 0x483ed24300000000, + 0x2d596efb00000000, 0xc3f6dbe900000000, 0xa691675100000000, + 0x1fa9b0cc00000000, 0x7ace0c7400000000, 0x9461b96600000000, + 0xf10605de00000000}, + {0x0000000000000000, 0xb029603d00000000, 0x6053c07a00000000, + 0xd07aa04700000000, 0xc0a680f500000000, 0x708fe0c800000000, + 0xa0f5408f00000000, 0x10dc20b200000000, 0xc14b703000000000, + 0x7162100d00000000, 0xa118b04a00000000, 0x1131d07700000000, + 0x01edf0c500000000, 0xb1c490f800000000, 0x61be30bf00000000, + 0xd197508200000000, 0x8297e06000000000, 0x32be805d00000000, + 0xe2c4201a00000000, 0x52ed402700000000, 0x4231609500000000, + 0xf21800a800000000, 0x2262a0ef00000000, 0x924bc0d200000000, + 0x43dc905000000000, 0xf3f5f06d00000000, 0x238f502a00000000, + 0x93a6301700000000, 0x837a10a500000000, 0x3353709800000000, + 0xe329d0df00000000, 0x5300b0e200000000, 0x042fc1c100000000, + 0xb406a1fc00000000, 0x647c01bb00000000, 0xd455618600000000, + 0xc489413400000000, 0x74a0210900000000, 0xa4da814e00000000, + 0x14f3e17300000000, 0xc564b1f100000000, 0x754dd1cc00000000, + 0xa537718b00000000, 0x151e11b600000000, 0x05c2310400000000, + 0xb5eb513900000000, 0x6591f17e00000000, 0xd5b8914300000000, + 0x86b821a100000000, 0x3691419c00000000, 0xe6ebe1db00000000, + 0x56c281e600000000, 0x461ea15400000000, 0xf637c16900000000, + 0x264d612e00000000, 0x9664011300000000, 0x47f3519100000000, + 0xf7da31ac00000000, 0x27a091eb00000000, 0x9789f1d600000000, + 0x8755d16400000000, 0x377cb15900000000, 0xe706111e00000000, + 0x572f712300000000, 0x4958f35800000000, 0xf971936500000000, + 0x290b332200000000, 0x9922531f00000000, 0x89fe73ad00000000, + 0x39d7139000000000, 0xe9adb3d700000000, 0x5984d3ea00000000, + 0x8813836800000000, 0x383ae35500000000, 0xe840431200000000, + 0x5869232f00000000, 0x48b5039d00000000, 0xf89c63a000000000, + 0x28e6c3e700000000, 0x98cfa3da00000000, 0xcbcf133800000000, + 0x7be6730500000000, 0xab9cd34200000000, 0x1bb5b37f00000000, + 0x0b6993cd00000000, 0xbb40f3f000000000, 0x6b3a53b700000000, + 0xdb13338a00000000, 0x0a84630800000000, 0xbaad033500000000, + 0x6ad7a37200000000, 0xdafec34f00000000, 0xca22e3fd00000000, + 0x7a0b83c000000000, 0xaa71238700000000, 0x1a5843ba00000000, + 0x4d77329900000000, 0xfd5e52a400000000, 0x2d24f2e300000000, + 0x9d0d92de00000000, 0x8dd1b26c00000000, 0x3df8d25100000000, + 0xed82721600000000, 0x5dab122b00000000, 0x8c3c42a900000000, + 0x3c15229400000000, 0xec6f82d300000000, 0x5c46e2ee00000000, + 0x4c9ac25c00000000, 0xfcb3a26100000000, 0x2cc9022600000000, + 0x9ce0621b00000000, 0xcfe0d2f900000000, 0x7fc9b2c400000000, + 0xafb3128300000000, 0x1f9a72be00000000, 0x0f46520c00000000, + 0xbf6f323100000000, 0x6f15927600000000, 0xdf3cf24b00000000, + 0x0eaba2c900000000, 0xbe82c2f400000000, 0x6ef862b300000000, + 0xded1028e00000000, 0xce0d223c00000000, 0x7e24420100000000, + 0xae5ee24600000000, 0x1e77827b00000000, 0x92b0e6b100000000, + 0x2299868c00000000, 0xf2e326cb00000000, 0x42ca46f600000000, + 0x5216664400000000, 0xe23f067900000000, 0x3245a63e00000000, + 0x826cc60300000000, 0x53fb968100000000, 0xe3d2f6bc00000000, + 0x33a856fb00000000, 0x838136c600000000, 0x935d167400000000, + 0x2374764900000000, 0xf30ed60e00000000, 0x4327b63300000000, + 0x102706d100000000, 0xa00e66ec00000000, 0x7074c6ab00000000, + 0xc05da69600000000, 0xd081862400000000, 0x60a8e61900000000, + 0xb0d2465e00000000, 0x00fb266300000000, 0xd16c76e100000000, + 0x614516dc00000000, 0xb13fb69b00000000, 0x0116d6a600000000, + 0x11caf61400000000, 0xa1e3962900000000, 0x7199366e00000000, + 0xc1b0565300000000, 0x969f277000000000, 0x26b6474d00000000, + 0xf6cce70a00000000, 0x46e5873700000000, 0x5639a78500000000, + 0xe610c7b800000000, 0x366a67ff00000000, 0x864307c200000000, + 0x57d4574000000000, 0xe7fd377d00000000, 0x3787973a00000000, + 0x87aef70700000000, 0x9772d7b500000000, 0x275bb78800000000, + 0xf72117cf00000000, 0x470877f200000000, 0x1408c71000000000, + 0xa421a72d00000000, 0x745b076a00000000, 0xc472675700000000, + 0xd4ae47e500000000, 0x648727d800000000, 0xb4fd879f00000000, + 0x04d4e7a200000000, 0xd543b72000000000, 0x656ad71d00000000, + 0xb510775a00000000, 0x0539176700000000, 0x15e537d500000000, + 0xa5cc57e800000000, 0x75b6f7af00000000, 0xc59f979200000000, + 0xdbe815e900000000, 0x6bc175d400000000, 0xbbbbd59300000000, + 0x0b92b5ae00000000, 0x1b4e951c00000000, 0xab67f52100000000, + 0x7b1d556600000000, 0xcb34355b00000000, 0x1aa365d900000000, + 0xaa8a05e400000000, 0x7af0a5a300000000, 0xcad9c59e00000000, + 0xda05e52c00000000, 0x6a2c851100000000, 0xba56255600000000, + 0x0a7f456b00000000, 0x597ff58900000000, 0xe95695b400000000, + 0x392c35f300000000, 0x890555ce00000000, 0x99d9757c00000000, + 0x29f0154100000000, 0xf98ab50600000000, 0x49a3d53b00000000, + 0x983485b900000000, 0x281de58400000000, 0xf86745c300000000, + 0x484e25fe00000000, 0x5892054c00000000, 0xe8bb657100000000, + 0x38c1c53600000000, 0x88e8a50b00000000, 0xdfc7d42800000000, + 0x6feeb41500000000, 0xbf94145200000000, 0x0fbd746f00000000, + 0x1f6154dd00000000, 0xaf4834e000000000, 0x7f3294a700000000, + 0xcf1bf49a00000000, 0x1e8ca41800000000, 0xaea5c42500000000, + 0x7edf646200000000, 0xcef6045f00000000, 0xde2a24ed00000000, + 0x6e0344d000000000, 0xbe79e49700000000, 0x0e5084aa00000000, + 0x5d50344800000000, 0xed79547500000000, 0x3d03f43200000000, + 0x8d2a940f00000000, 0x9df6b4bd00000000, 0x2ddfd48000000000, + 0xfda574c700000000, 0x4d8c14fa00000000, 0x9c1b447800000000, + 0x2c32244500000000, 0xfc48840200000000, 0x4c61e43f00000000, + 0x5cbdc48d00000000, 0xec94a4b000000000, 0x3cee04f700000000, + 0x8cc764ca00000000}, + {0x0000000000000000, 0xa5d35ccb00000000, 0x0ba1c84d00000000, + 0xae72948600000000, 0x1642919b00000000, 0xb391cd5000000000, + 0x1de359d600000000, 0xb830051d00000000, 0x6d8253ec00000000, + 0xc8510f2700000000, 0x66239ba100000000, 0xc3f0c76a00000000, + 0x7bc0c27700000000, 0xde139ebc00000000, 0x70610a3a00000000, + 0xd5b256f100000000, 0x9b02d60300000000, 0x3ed18ac800000000, + 0x90a31e4e00000000, 0x3570428500000000, 0x8d40479800000000, + 0x28931b5300000000, 0x86e18fd500000000, 0x2332d31e00000000, + 0xf68085ef00000000, 0x5353d92400000000, 0xfd214da200000000, + 0x58f2116900000000, 0xe0c2147400000000, 0x451148bf00000000, + 0xeb63dc3900000000, 0x4eb080f200000000, 0x3605ac0700000000, + 0x93d6f0cc00000000, 0x3da4644a00000000, 0x9877388100000000, + 0x20473d9c00000000, 0x8594615700000000, 0x2be6f5d100000000, + 0x8e35a91a00000000, 0x5b87ffeb00000000, 0xfe54a32000000000, + 0x502637a600000000, 0xf5f56b6d00000000, 0x4dc56e7000000000, + 0xe81632bb00000000, 0x4664a63d00000000, 0xe3b7faf600000000, + 0xad077a0400000000, 0x08d426cf00000000, 0xa6a6b24900000000, + 0x0375ee8200000000, 0xbb45eb9f00000000, 0x1e96b75400000000, + 0xb0e423d200000000, 0x15377f1900000000, 0xc08529e800000000, + 0x6556752300000000, 0xcb24e1a500000000, 0x6ef7bd6e00000000, + 0xd6c7b87300000000, 0x7314e4b800000000, 0xdd66703e00000000, + 0x78b52cf500000000, 0x6c0a580f00000000, 0xc9d904c400000000, + 0x67ab904200000000, 0xc278cc8900000000, 0x7a48c99400000000, + 0xdf9b955f00000000, 0x71e901d900000000, 0xd43a5d1200000000, + 0x01880be300000000, 0xa45b572800000000, 0x0a29c3ae00000000, + 0xaffa9f6500000000, 0x17ca9a7800000000, 0xb219c6b300000000, + 0x1c6b523500000000, 0xb9b80efe00000000, 0xf7088e0c00000000, + 0x52dbd2c700000000, 0xfca9464100000000, 0x597a1a8a00000000, + 0xe14a1f9700000000, 0x4499435c00000000, 0xeaebd7da00000000, + 0x4f388b1100000000, 0x9a8adde000000000, 0x3f59812b00000000, + 0x912b15ad00000000, 0x34f8496600000000, 0x8cc84c7b00000000, + 0x291b10b000000000, 0x8769843600000000, 0x22bad8fd00000000, + 0x5a0ff40800000000, 0xffdca8c300000000, 0x51ae3c4500000000, + 0xf47d608e00000000, 0x4c4d659300000000, 0xe99e395800000000, + 0x47ecadde00000000, 0xe23ff11500000000, 0x378da7e400000000, + 0x925efb2f00000000, 0x3c2c6fa900000000, 0x99ff336200000000, + 0x21cf367f00000000, 0x841c6ab400000000, 0x2a6efe3200000000, + 0x8fbda2f900000000, 0xc10d220b00000000, 0x64de7ec000000000, + 0xcaacea4600000000, 0x6f7fb68d00000000, 0xd74fb39000000000, + 0x729cef5b00000000, 0xdcee7bdd00000000, 0x793d271600000000, + 0xac8f71e700000000, 0x095c2d2c00000000, 0xa72eb9aa00000000, + 0x02fde56100000000, 0xbacde07c00000000, 0x1f1ebcb700000000, + 0xb16c283100000000, 0x14bf74fa00000000, 0xd814b01e00000000, + 0x7dc7ecd500000000, 0xd3b5785300000000, 0x7666249800000000, + 0xce56218500000000, 0x6b857d4e00000000, 0xc5f7e9c800000000, + 0x6024b50300000000, 0xb596e3f200000000, 0x1045bf3900000000, + 0xbe372bbf00000000, 0x1be4777400000000, 0xa3d4726900000000, + 0x06072ea200000000, 0xa875ba2400000000, 0x0da6e6ef00000000, + 0x4316661d00000000, 0xe6c53ad600000000, 0x48b7ae5000000000, + 0xed64f29b00000000, 0x5554f78600000000, 0xf087ab4d00000000, + 0x5ef53fcb00000000, 0xfb26630000000000, 0x2e9435f100000000, + 0x8b47693a00000000, 0x2535fdbc00000000, 0x80e6a17700000000, + 0x38d6a46a00000000, 0x9d05f8a100000000, 0x33776c2700000000, + 0x96a430ec00000000, 0xee111c1900000000, 0x4bc240d200000000, + 0xe5b0d45400000000, 0x4063889f00000000, 0xf8538d8200000000, + 0x5d80d14900000000, 0xf3f245cf00000000, 0x5621190400000000, + 0x83934ff500000000, 0x2640133e00000000, 0x883287b800000000, + 0x2de1db7300000000, 0x95d1de6e00000000, 0x300282a500000000, + 0x9e70162300000000, 0x3ba34ae800000000, 0x7513ca1a00000000, + 0xd0c096d100000000, 0x7eb2025700000000, 0xdb615e9c00000000, + 0x63515b8100000000, 0xc682074a00000000, 0x68f093cc00000000, + 0xcd23cf0700000000, 0x189199f600000000, 0xbd42c53d00000000, + 0x133051bb00000000, 0xb6e30d7000000000, 0x0ed3086d00000000, + 0xab0054a600000000, 0x0572c02000000000, 0xa0a19ceb00000000, + 0xb41ee81100000000, 0x11cdb4da00000000, 0xbfbf205c00000000, + 0x1a6c7c9700000000, 0xa25c798a00000000, 0x078f254100000000, + 0xa9fdb1c700000000, 0x0c2eed0c00000000, 0xd99cbbfd00000000, + 0x7c4fe73600000000, 0xd23d73b000000000, 0x77ee2f7b00000000, + 0xcfde2a6600000000, 0x6a0d76ad00000000, 0xc47fe22b00000000, + 0x61acbee000000000, 0x2f1c3e1200000000, 0x8acf62d900000000, + 0x24bdf65f00000000, 0x816eaa9400000000, 0x395eaf8900000000, + 0x9c8df34200000000, 0x32ff67c400000000, 0x972c3b0f00000000, + 0x429e6dfe00000000, 0xe74d313500000000, 0x493fa5b300000000, + 0xececf97800000000, 0x54dcfc6500000000, 0xf10fa0ae00000000, + 0x5f7d342800000000, 0xfaae68e300000000, 0x821b441600000000, + 0x27c818dd00000000, 0x89ba8c5b00000000, 0x2c69d09000000000, + 0x9459d58d00000000, 0x318a894600000000, 0x9ff81dc000000000, + 0x3a2b410b00000000, 0xef9917fa00000000, 0x4a4a4b3100000000, + 0xe438dfb700000000, 0x41eb837c00000000, 0xf9db866100000000, + 0x5c08daaa00000000, 0xf27a4e2c00000000, 0x57a912e700000000, + 0x1919921500000000, 0xbccacede00000000, 0x12b85a5800000000, + 0xb76b069300000000, 0x0f5b038e00000000, 0xaa885f4500000000, + 0x04facbc300000000, 0xa129970800000000, 0x749bc1f900000000, + 0xd1489d3200000000, 0x7f3a09b400000000, 0xdae9557f00000000, + 0x62d9506200000000, 0xc70a0ca900000000, 0x6978982f00000000, + 0xccabc4e400000000}, + {0x0000000000000000, 0xb40b77a600000000, 0x29119f9700000000, + 0x9d1ae83100000000, 0x13244ff400000000, 0xa72f385200000000, + 0x3a35d06300000000, 0x8e3ea7c500000000, 0x674eef3300000000, + 0xd345989500000000, 0x4e5f70a400000000, 0xfa54070200000000, + 0x746aa0c700000000, 0xc061d76100000000, 0x5d7b3f5000000000, + 0xe97048f600000000, 0xce9cde6700000000, 0x7a97a9c100000000, + 0xe78d41f000000000, 0x5386365600000000, 0xddb8919300000000, + 0x69b3e63500000000, 0xf4a90e0400000000, 0x40a279a200000000, + 0xa9d2315400000000, 0x1dd946f200000000, 0x80c3aec300000000, + 0x34c8d96500000000, 0xbaf67ea000000000, 0x0efd090600000000, + 0x93e7e13700000000, 0x27ec969100000000, 0x9c39bdcf00000000, + 0x2832ca6900000000, 0xb528225800000000, 0x012355fe00000000, + 0x8f1df23b00000000, 0x3b16859d00000000, 0xa60c6dac00000000, + 0x12071a0a00000000, 0xfb7752fc00000000, 0x4f7c255a00000000, + 0xd266cd6b00000000, 0x666dbacd00000000, 0xe8531d0800000000, + 0x5c586aae00000000, 0xc142829f00000000, 0x7549f53900000000, + 0x52a563a800000000, 0xe6ae140e00000000, 0x7bb4fc3f00000000, + 0xcfbf8b9900000000, 0x41812c5c00000000, 0xf58a5bfa00000000, + 0x6890b3cb00000000, 0xdc9bc46d00000000, 0x35eb8c9b00000000, + 0x81e0fb3d00000000, 0x1cfa130c00000000, 0xa8f164aa00000000, + 0x26cfc36f00000000, 0x92c4b4c900000000, 0x0fde5cf800000000, + 0xbbd52b5e00000000, 0x79750b4400000000, 0xcd7e7ce200000000, + 0x506494d300000000, 0xe46fe37500000000, 0x6a5144b000000000, + 0xde5a331600000000, 0x4340db2700000000, 0xf74bac8100000000, + 0x1e3be47700000000, 0xaa3093d100000000, 0x372a7be000000000, + 0x83210c4600000000, 0x0d1fab8300000000, 0xb914dc2500000000, + 0x240e341400000000, 0x900543b200000000, 0xb7e9d52300000000, + 0x03e2a28500000000, 0x9ef84ab400000000, 0x2af33d1200000000, + 0xa4cd9ad700000000, 0x10c6ed7100000000, 0x8ddc054000000000, + 0x39d772e600000000, 0xd0a73a1000000000, 0x64ac4db600000000, + 0xf9b6a58700000000, 0x4dbdd22100000000, 0xc38375e400000000, + 0x7788024200000000, 0xea92ea7300000000, 0x5e999dd500000000, + 0xe54cb68b00000000, 0x5147c12d00000000, 0xcc5d291c00000000, + 0x78565eba00000000, 0xf668f97f00000000, 0x42638ed900000000, + 0xdf7966e800000000, 0x6b72114e00000000, 0x820259b800000000, + 0x36092e1e00000000, 0xab13c62f00000000, 0x1f18b18900000000, + 0x9126164c00000000, 0x252d61ea00000000, 0xb83789db00000000, + 0x0c3cfe7d00000000, 0x2bd068ec00000000, 0x9fdb1f4a00000000, + 0x02c1f77b00000000, 0xb6ca80dd00000000, 0x38f4271800000000, + 0x8cff50be00000000, 0x11e5b88f00000000, 0xa5eecf2900000000, + 0x4c9e87df00000000, 0xf895f07900000000, 0x658f184800000000, + 0xd1846fee00000000, 0x5fbac82b00000000, 0xebb1bf8d00000000, + 0x76ab57bc00000000, 0xc2a0201a00000000, 0xf2ea168800000000, + 0x46e1612e00000000, 0xdbfb891f00000000, 0x6ff0feb900000000, + 0xe1ce597c00000000, 0x55c52eda00000000, 0xc8dfc6eb00000000, + 0x7cd4b14d00000000, 0x95a4f9bb00000000, 0x21af8e1d00000000, + 0xbcb5662c00000000, 0x08be118a00000000, 0x8680b64f00000000, + 0x328bc1e900000000, 0xaf9129d800000000, 0x1b9a5e7e00000000, + 0x3c76c8ef00000000, 0x887dbf4900000000, 0x1567577800000000, + 0xa16c20de00000000, 0x2f52871b00000000, 0x9b59f0bd00000000, + 0x0643188c00000000, 0xb2486f2a00000000, 0x5b3827dc00000000, + 0xef33507a00000000, 0x7229b84b00000000, 0xc622cfed00000000, + 0x481c682800000000, 0xfc171f8e00000000, 0x610df7bf00000000, + 0xd506801900000000, 0x6ed3ab4700000000, 0xdad8dce100000000, + 0x47c234d000000000, 0xf3c9437600000000, 0x7df7e4b300000000, + 0xc9fc931500000000, 0x54e67b2400000000, 0xe0ed0c8200000000, + 0x099d447400000000, 0xbd9633d200000000, 0x208cdbe300000000, + 0x9487ac4500000000, 0x1ab90b8000000000, 0xaeb27c2600000000, + 0x33a8941700000000, 0x87a3e3b100000000, 0xa04f752000000000, + 0x1444028600000000, 0x895eeab700000000, 0x3d559d1100000000, + 0xb36b3ad400000000, 0x07604d7200000000, 0x9a7aa54300000000, + 0x2e71d2e500000000, 0xc7019a1300000000, 0x730aedb500000000, + 0xee10058400000000, 0x5a1b722200000000, 0xd425d5e700000000, + 0x602ea24100000000, 0xfd344a7000000000, 0x493f3dd600000000, + 0x8b9f1dcc00000000, 0x3f946a6a00000000, 0xa28e825b00000000, + 0x1685f5fd00000000, 0x98bb523800000000, 0x2cb0259e00000000, + 0xb1aacdaf00000000, 0x05a1ba0900000000, 0xecd1f2ff00000000, + 0x58da855900000000, 0xc5c06d6800000000, 0x71cb1ace00000000, + 0xfff5bd0b00000000, 0x4bfecaad00000000, 0xd6e4229c00000000, + 0x62ef553a00000000, 0x4503c3ab00000000, 0xf108b40d00000000, + 0x6c125c3c00000000, 0xd8192b9a00000000, 0x56278c5f00000000, + 0xe22cfbf900000000, 0x7f3613c800000000, 0xcb3d646e00000000, + 0x224d2c9800000000, 0x96465b3e00000000, 0x0b5cb30f00000000, + 0xbf57c4a900000000, 0x3169636c00000000, 0x856214ca00000000, + 0x1878fcfb00000000, 0xac738b5d00000000, 0x17a6a00300000000, + 0xa3add7a500000000, 0x3eb73f9400000000, 0x8abc483200000000, + 0x0482eff700000000, 0xb089985100000000, 0x2d93706000000000, + 0x999807c600000000, 0x70e84f3000000000, 0xc4e3389600000000, + 0x59f9d0a700000000, 0xedf2a70100000000, 0x63cc00c400000000, + 0xd7c7776200000000, 0x4add9f5300000000, 0xfed6e8f500000000, + 0xd93a7e6400000000, 0x6d3109c200000000, 0xf02be1f300000000, + 0x4420965500000000, 0xca1e319000000000, 0x7e15463600000000, + 0xe30fae0700000000, 0x5704d9a100000000, 0xbe74915700000000, + 0x0a7fe6f100000000, 0x97650ec000000000, 0x236e796600000000, + 0xad50dea300000000, 0x195ba90500000000, 0x8441413400000000, + 0x304a369200000000}, + {0x0000000000000000, 0x9e00aacc00000000, 0x7d07254200000000, + 0xe3078f8e00000000, 0xfa0e4a8400000000, 0x640ee04800000000, + 0x87096fc600000000, 0x1909c50a00000000, 0xb51be5d300000000, + 0x2b1b4f1f00000000, 0xc81cc09100000000, 0x561c6a5d00000000, + 0x4f15af5700000000, 0xd115059b00000000, 0x32128a1500000000, + 0xac1220d900000000, 0x2b31bb7c00000000, 0xb53111b000000000, + 0x56369e3e00000000, 0xc83634f200000000, 0xd13ff1f800000000, + 0x4f3f5b3400000000, 0xac38d4ba00000000, 0x32387e7600000000, + 0x9e2a5eaf00000000, 0x002af46300000000, 0xe32d7bed00000000, + 0x7d2dd12100000000, 0x6424142b00000000, 0xfa24bee700000000, + 0x1923316900000000, 0x87239ba500000000, 0x566276f900000000, + 0xc862dc3500000000, 0x2b6553bb00000000, 0xb565f97700000000, + 0xac6c3c7d00000000, 0x326c96b100000000, 0xd16b193f00000000, + 0x4f6bb3f300000000, 0xe379932a00000000, 0x7d7939e600000000, + 0x9e7eb66800000000, 0x007e1ca400000000, 0x1977d9ae00000000, + 0x8777736200000000, 0x6470fcec00000000, 0xfa70562000000000, + 0x7d53cd8500000000, 0xe353674900000000, 0x0054e8c700000000, + 0x9e54420b00000000, 0x875d870100000000, 0x195d2dcd00000000, + 0xfa5aa24300000000, 0x645a088f00000000, 0xc848285600000000, + 0x5648829a00000000, 0xb54f0d1400000000, 0x2b4fa7d800000000, + 0x324662d200000000, 0xac46c81e00000000, 0x4f41479000000000, + 0xd141ed5c00000000, 0xedc29d2900000000, 0x73c237e500000000, + 0x90c5b86b00000000, 0x0ec512a700000000, 0x17ccd7ad00000000, + 0x89cc7d6100000000, 0x6acbf2ef00000000, 0xf4cb582300000000, + 0x58d978fa00000000, 0xc6d9d23600000000, 0x25de5db800000000, + 0xbbdef77400000000, 0xa2d7327e00000000, 0x3cd798b200000000, + 0xdfd0173c00000000, 0x41d0bdf000000000, 0xc6f3265500000000, + 0x58f38c9900000000, 0xbbf4031700000000, 0x25f4a9db00000000, + 0x3cfd6cd100000000, 0xa2fdc61d00000000, 0x41fa499300000000, + 0xdffae35f00000000, 0x73e8c38600000000, 0xede8694a00000000, + 0x0eefe6c400000000, 0x90ef4c0800000000, 0x89e6890200000000, + 0x17e623ce00000000, 0xf4e1ac4000000000, 0x6ae1068c00000000, + 0xbba0ebd000000000, 0x25a0411c00000000, 0xc6a7ce9200000000, + 0x58a7645e00000000, 0x41aea15400000000, 0xdfae0b9800000000, + 0x3ca9841600000000, 0xa2a92eda00000000, 0x0ebb0e0300000000, + 0x90bba4cf00000000, 0x73bc2b4100000000, 0xedbc818d00000000, + 0xf4b5448700000000, 0x6ab5ee4b00000000, 0x89b261c500000000, + 0x17b2cb0900000000, 0x909150ac00000000, 0x0e91fa6000000000, + 0xed9675ee00000000, 0x7396df2200000000, 0x6a9f1a2800000000, + 0xf49fb0e400000000, 0x17983f6a00000000, 0x899895a600000000, + 0x258ab57f00000000, 0xbb8a1fb300000000, 0x588d903d00000000, + 0xc68d3af100000000, 0xdf84fffb00000000, 0x4184553700000000, + 0xa283dab900000000, 0x3c83707500000000, 0xda853b5300000000, + 0x4485919f00000000, 0xa7821e1100000000, 0x3982b4dd00000000, + 0x208b71d700000000, 0xbe8bdb1b00000000, 0x5d8c549500000000, + 0xc38cfe5900000000, 0x6f9ede8000000000, 0xf19e744c00000000, + 0x1299fbc200000000, 0x8c99510e00000000, 0x9590940400000000, + 0x0b903ec800000000, 0xe897b14600000000, 0x76971b8a00000000, + 0xf1b4802f00000000, 0x6fb42ae300000000, 0x8cb3a56d00000000, + 0x12b30fa100000000, 0x0bbacaab00000000, 0x95ba606700000000, + 0x76bdefe900000000, 0xe8bd452500000000, 0x44af65fc00000000, + 0xdaafcf3000000000, 0x39a840be00000000, 0xa7a8ea7200000000, + 0xbea12f7800000000, 0x20a185b400000000, 0xc3a60a3a00000000, + 0x5da6a0f600000000, 0x8ce74daa00000000, 0x12e7e76600000000, + 0xf1e068e800000000, 0x6fe0c22400000000, 0x76e9072e00000000, + 0xe8e9ade200000000, 0x0bee226c00000000, 0x95ee88a000000000, + 0x39fca87900000000, 0xa7fc02b500000000, 0x44fb8d3b00000000, + 0xdafb27f700000000, 0xc3f2e2fd00000000, 0x5df2483100000000, + 0xbef5c7bf00000000, 0x20f56d7300000000, 0xa7d6f6d600000000, + 0x39d65c1a00000000, 0xdad1d39400000000, 0x44d1795800000000, + 0x5dd8bc5200000000, 0xc3d8169e00000000, 0x20df991000000000, + 0xbedf33dc00000000, 0x12cd130500000000, 0x8ccdb9c900000000, + 0x6fca364700000000, 0xf1ca9c8b00000000, 0xe8c3598100000000, + 0x76c3f34d00000000, 0x95c47cc300000000, 0x0bc4d60f00000000, + 0x3747a67a00000000, 0xa9470cb600000000, 0x4a40833800000000, + 0xd44029f400000000, 0xcd49ecfe00000000, 0x5349463200000000, + 0xb04ec9bc00000000, 0x2e4e637000000000, 0x825c43a900000000, + 0x1c5ce96500000000, 0xff5b66eb00000000, 0x615bcc2700000000, + 0x7852092d00000000, 0xe652a3e100000000, 0x05552c6f00000000, + 0x9b5586a300000000, 0x1c761d0600000000, 0x8276b7ca00000000, + 0x6171384400000000, 0xff71928800000000, 0xe678578200000000, + 0x7878fd4e00000000, 0x9b7f72c000000000, 0x057fd80c00000000, + 0xa96df8d500000000, 0x376d521900000000, 0xd46add9700000000, + 0x4a6a775b00000000, 0x5363b25100000000, 0xcd63189d00000000, + 0x2e64971300000000, 0xb0643ddf00000000, 0x6125d08300000000, + 0xff257a4f00000000, 0x1c22f5c100000000, 0x82225f0d00000000, + 0x9b2b9a0700000000, 0x052b30cb00000000, 0xe62cbf4500000000, + 0x782c158900000000, 0xd43e355000000000, 0x4a3e9f9c00000000, + 0xa939101200000000, 0x3739bade00000000, 0x2e307fd400000000, + 0xb030d51800000000, 0x53375a9600000000, 0xcd37f05a00000000, + 0x4a146bff00000000, 0xd414c13300000000, 0x37134ebd00000000, + 0xa913e47100000000, 0xb01a217b00000000, 0x2e1a8bb700000000, + 0xcd1d043900000000, 0x531daef500000000, 0xff0f8e2c00000000, + 0x610f24e000000000, 0x8208ab6e00000000, 0x1c0801a200000000, + 0x0501c4a800000000, 0x9b016e6400000000, 0x7806e1ea00000000, + 0xe6064b2600000000}}; + +#else /* W == 4 */ + +local const z_crc_t FAR crc_braid_table[][256] = { + {0x00000000, 0xb8bc6765, 0xaa09c88b, 0x12b5afee, 0x8f629757, + 0x37def032, 0x256b5fdc, 0x9dd738b9, 0xc5b428ef, 0x7d084f8a, + 0x6fbde064, 0xd7018701, 0x4ad6bfb8, 0xf26ad8dd, 0xe0df7733, + 0x58631056, 0x5019579f, 0xe8a530fa, 0xfa109f14, 0x42acf871, + 0xdf7bc0c8, 0x67c7a7ad, 0x75720843, 0xcdce6f26, 0x95ad7f70, + 0x2d111815, 0x3fa4b7fb, 0x8718d09e, 0x1acfe827, 0xa2738f42, + 0xb0c620ac, 0x087a47c9, 0xa032af3e, 0x188ec85b, 0x0a3b67b5, + 0xb28700d0, 0x2f503869, 0x97ec5f0c, 0x8559f0e2, 0x3de59787, + 0x658687d1, 0xdd3ae0b4, 0xcf8f4f5a, 0x7733283f, 0xeae41086, + 0x525877e3, 0x40edd80d, 0xf851bf68, 0xf02bf8a1, 0x48979fc4, + 0x5a22302a, 0xe29e574f, 0x7f496ff6, 0xc7f50893, 0xd540a77d, + 0x6dfcc018, 0x359fd04e, 0x8d23b72b, 0x9f9618c5, 0x272a7fa0, + 0xbafd4719, 0x0241207c, 0x10f48f92, 0xa848e8f7, 0x9b14583d, + 0x23a83f58, 0x311d90b6, 0x89a1f7d3, 0x1476cf6a, 0xaccaa80f, + 0xbe7f07e1, 0x06c36084, 0x5ea070d2, 0xe61c17b7, 0xf4a9b859, + 0x4c15df3c, 0xd1c2e785, 0x697e80e0, 0x7bcb2f0e, 0xc377486b, + 0xcb0d0fa2, 0x73b168c7, 0x6104c729, 0xd9b8a04c, 0x446f98f5, + 0xfcd3ff90, 0xee66507e, 0x56da371b, 0x0eb9274d, 0xb6054028, + 0xa4b0efc6, 0x1c0c88a3, 0x81dbb01a, 0x3967d77f, 0x2bd27891, + 0x936e1ff4, 0x3b26f703, 0x839a9066, 0x912f3f88, 0x299358ed, + 0xb4446054, 0x0cf80731, 0x1e4da8df, 0xa6f1cfba, 0xfe92dfec, + 0x462eb889, 0x549b1767, 0xec277002, 0x71f048bb, 0xc94c2fde, + 0xdbf98030, 0x6345e755, 0x6b3fa09c, 0xd383c7f9, 0xc1366817, + 0x798a0f72, 0xe45d37cb, 0x5ce150ae, 0x4e54ff40, 0xf6e89825, + 0xae8b8873, 0x1637ef16, 0x048240f8, 0xbc3e279d, 0x21e91f24, + 0x99557841, 0x8be0d7af, 0x335cb0ca, 0xed59b63b, 0x55e5d15e, + 0x47507eb0, 0xffec19d5, 0x623b216c, 0xda874609, 0xc832e9e7, + 0x708e8e82, 0x28ed9ed4, 0x9051f9b1, 0x82e4565f, 0x3a58313a, + 0xa78f0983, 0x1f336ee6, 0x0d86c108, 0xb53aa66d, 0xbd40e1a4, + 0x05fc86c1, 0x1749292f, 0xaff54e4a, 0x322276f3, 0x8a9e1196, + 0x982bbe78, 0x2097d91d, 0x78f4c94b, 0xc048ae2e, 0xd2fd01c0, + 0x6a4166a5, 0xf7965e1c, 0x4f2a3979, 0x5d9f9697, 0xe523f1f2, + 0x4d6b1905, 0xf5d77e60, 0xe762d18e, 0x5fdeb6eb, 0xc2098e52, + 0x7ab5e937, 0x680046d9, 0xd0bc21bc, 0x88df31ea, 0x3063568f, + 0x22d6f961, 0x9a6a9e04, 0x07bda6bd, 0xbf01c1d8, 0xadb46e36, + 0x15080953, 0x1d724e9a, 0xa5ce29ff, 0xb77b8611, 0x0fc7e174, + 0x9210d9cd, 0x2aacbea8, 0x38191146, 0x80a57623, 0xd8c66675, + 0x607a0110, 0x72cfaefe, 0xca73c99b, 0x57a4f122, 0xef189647, + 0xfdad39a9, 0x45115ecc, 0x764dee06, 0xcef18963, 0xdc44268d, + 0x64f841e8, 0xf92f7951, 0x41931e34, 0x5326b1da, 0xeb9ad6bf, + 0xb3f9c6e9, 0x0b45a18c, 0x19f00e62, 0xa14c6907, 0x3c9b51be, + 0x842736db, 0x96929935, 0x2e2efe50, 0x2654b999, 0x9ee8defc, + 0x8c5d7112, 0x34e11677, 0xa9362ece, 0x118a49ab, 0x033fe645, + 0xbb838120, 0xe3e09176, 0x5b5cf613, 0x49e959fd, 0xf1553e98, + 0x6c820621, 0xd43e6144, 0xc68bceaa, 0x7e37a9cf, 0xd67f4138, + 0x6ec3265d, 0x7c7689b3, 0xc4caeed6, 0x591dd66f, 0xe1a1b10a, + 0xf3141ee4, 0x4ba87981, 0x13cb69d7, 0xab770eb2, 0xb9c2a15c, + 0x017ec639, 0x9ca9fe80, 0x241599e5, 0x36a0360b, 0x8e1c516e, + 0x866616a7, 0x3eda71c2, 0x2c6fde2c, 0x94d3b949, 0x090481f0, + 0xb1b8e695, 0xa30d497b, 0x1bb12e1e, 0x43d23e48, 0xfb6e592d, + 0xe9dbf6c3, 0x516791a6, 0xccb0a91f, 0x740cce7a, 0x66b96194, + 0xde0506f1}, + {0x00000000, 0x01c26a37, 0x0384d46e, 0x0246be59, 0x0709a8dc, + 0x06cbc2eb, 0x048d7cb2, 0x054f1685, 0x0e1351b8, 0x0fd13b8f, + 0x0d9785d6, 0x0c55efe1, 0x091af964, 0x08d89353, 0x0a9e2d0a, + 0x0b5c473d, 0x1c26a370, 0x1de4c947, 0x1fa2771e, 0x1e601d29, + 0x1b2f0bac, 0x1aed619b, 0x18abdfc2, 0x1969b5f5, 0x1235f2c8, + 0x13f798ff, 0x11b126a6, 0x10734c91, 0x153c5a14, 0x14fe3023, + 0x16b88e7a, 0x177ae44d, 0x384d46e0, 0x398f2cd7, 0x3bc9928e, + 0x3a0bf8b9, 0x3f44ee3c, 0x3e86840b, 0x3cc03a52, 0x3d025065, + 0x365e1758, 0x379c7d6f, 0x35dac336, 0x3418a901, 0x3157bf84, + 0x3095d5b3, 0x32d36bea, 0x331101dd, 0x246be590, 0x25a98fa7, + 0x27ef31fe, 0x262d5bc9, 0x23624d4c, 0x22a0277b, 0x20e69922, + 0x2124f315, 0x2a78b428, 0x2bbade1f, 0x29fc6046, 0x283e0a71, + 0x2d711cf4, 0x2cb376c3, 0x2ef5c89a, 0x2f37a2ad, 0x709a8dc0, + 0x7158e7f7, 0x731e59ae, 0x72dc3399, 0x7793251c, 0x76514f2b, + 0x7417f172, 0x75d59b45, 0x7e89dc78, 0x7f4bb64f, 0x7d0d0816, + 0x7ccf6221, 0x798074a4, 0x78421e93, 0x7a04a0ca, 0x7bc6cafd, + 0x6cbc2eb0, 0x6d7e4487, 0x6f38fade, 0x6efa90e9, 0x6bb5866c, + 0x6a77ec5b, 0x68315202, 0x69f33835, 0x62af7f08, 0x636d153f, + 0x612bab66, 0x60e9c151, 0x65a6d7d4, 0x6464bde3, 0x662203ba, + 0x67e0698d, 0x48d7cb20, 0x4915a117, 0x4b531f4e, 0x4a917579, + 0x4fde63fc, 0x4e1c09cb, 0x4c5ab792, 0x4d98dda5, 0x46c49a98, + 0x4706f0af, 0x45404ef6, 0x448224c1, 0x41cd3244, 0x400f5873, + 0x4249e62a, 0x438b8c1d, 0x54f16850, 0x55330267, 0x5775bc3e, + 0x56b7d609, 0x53f8c08c, 0x523aaabb, 0x507c14e2, 0x51be7ed5, + 0x5ae239e8, 0x5b2053df, 0x5966ed86, 0x58a487b1, 0x5deb9134, + 0x5c29fb03, 0x5e6f455a, 0x5fad2f6d, 0xe1351b80, 0xe0f771b7, + 0xe2b1cfee, 0xe373a5d9, 0xe63cb35c, 0xe7fed96b, 0xe5b86732, + 0xe47a0d05, 0xef264a38, 0xeee4200f, 0xeca29e56, 0xed60f461, + 0xe82fe2e4, 0xe9ed88d3, 0xebab368a, 0xea695cbd, 0xfd13b8f0, + 0xfcd1d2c7, 0xfe976c9e, 0xff5506a9, 0xfa1a102c, 0xfbd87a1b, + 0xf99ec442, 0xf85cae75, 0xf300e948, 0xf2c2837f, 0xf0843d26, + 0xf1465711, 0xf4094194, 0xf5cb2ba3, 0xf78d95fa, 0xf64fffcd, + 0xd9785d60, 0xd8ba3757, 0xdafc890e, 0xdb3ee339, 0xde71f5bc, + 0xdfb39f8b, 0xddf521d2, 0xdc374be5, 0xd76b0cd8, 0xd6a966ef, + 0xd4efd8b6, 0xd52db281, 0xd062a404, 0xd1a0ce33, 0xd3e6706a, + 0xd2241a5d, 0xc55efe10, 0xc49c9427, 0xc6da2a7e, 0xc7184049, + 0xc25756cc, 0xc3953cfb, 0xc1d382a2, 0xc011e895, 0xcb4dafa8, + 0xca8fc59f, 0xc8c97bc6, 0xc90b11f1, 0xcc440774, 0xcd866d43, + 0xcfc0d31a, 0xce02b92d, 0x91af9640, 0x906dfc77, 0x922b422e, + 0x93e92819, 0x96a63e9c, 0x976454ab, 0x9522eaf2, 0x94e080c5, + 0x9fbcc7f8, 0x9e7eadcf, 0x9c381396, 0x9dfa79a1, 0x98b56f24, + 0x99770513, 0x9b31bb4a, 0x9af3d17d, 0x8d893530, 0x8c4b5f07, + 0x8e0de15e, 0x8fcf8b69, 0x8a809dec, 0x8b42f7db, 0x89044982, + 0x88c623b5, 0x839a6488, 0x82580ebf, 0x801eb0e6, 0x81dcdad1, + 0x8493cc54, 0x8551a663, 0x8717183a, 0x86d5720d, 0xa9e2d0a0, + 0xa820ba97, 0xaa6604ce, 0xaba46ef9, 0xaeeb787c, 0xaf29124b, + 0xad6fac12, 0xacadc625, 0xa7f18118, 0xa633eb2f, 0xa4755576, + 0xa5b73f41, 0xa0f829c4, 0xa13a43f3, 0xa37cfdaa, 0xa2be979d, + 0xb5c473d0, 0xb40619e7, 0xb640a7be, 0xb782cd89, 0xb2cddb0c, + 0xb30fb13b, 0xb1490f62, 0xb08b6555, 0xbbd72268, 0xba15485f, + 0xb853f606, 0xb9919c31, 0xbcde8ab4, 0xbd1ce083, 0xbf5a5eda, + 0xbe9834ed}, + {0x00000000, 0x191b3141, 0x32366282, 0x2b2d53c3, 0x646cc504, + 0x7d77f445, 0x565aa786, 0x4f4196c7, 0xc8d98a08, 0xd1c2bb49, + 0xfaefe88a, 0xe3f4d9cb, 0xacb54f0c, 0xb5ae7e4d, 0x9e832d8e, + 0x87981ccf, 0x4ac21251, 0x53d92310, 0x78f470d3, 0x61ef4192, + 0x2eaed755, 0x37b5e614, 0x1c98b5d7, 0x05838496, 0x821b9859, + 0x9b00a918, 0xb02dfadb, 0xa936cb9a, 0xe6775d5d, 0xff6c6c1c, + 0xd4413fdf, 0xcd5a0e9e, 0x958424a2, 0x8c9f15e3, 0xa7b24620, + 0xbea97761, 0xf1e8e1a6, 0xe8f3d0e7, 0xc3de8324, 0xdac5b265, + 0x5d5daeaa, 0x44469feb, 0x6f6bcc28, 0x7670fd69, 0x39316bae, + 0x202a5aef, 0x0b07092c, 0x121c386d, 0xdf4636f3, 0xc65d07b2, + 0xed705471, 0xf46b6530, 0xbb2af3f7, 0xa231c2b6, 0x891c9175, + 0x9007a034, 0x179fbcfb, 0x0e848dba, 0x25a9de79, 0x3cb2ef38, + 0x73f379ff, 0x6ae848be, 0x41c51b7d, 0x58de2a3c, 0xf0794f05, + 0xe9627e44, 0xc24f2d87, 0xdb541cc6, 0x94158a01, 0x8d0ebb40, + 0xa623e883, 0xbf38d9c2, 0x38a0c50d, 0x21bbf44c, 0x0a96a78f, + 0x138d96ce, 0x5ccc0009, 0x45d73148, 0x6efa628b, 0x77e153ca, + 0xbabb5d54, 0xa3a06c15, 0x888d3fd6, 0x91960e97, 0xded79850, + 0xc7cca911, 0xece1fad2, 0xf5facb93, 0x7262d75c, 0x6b79e61d, + 0x4054b5de, 0x594f849f, 0x160e1258, 0x0f152319, 0x243870da, + 0x3d23419b, 0x65fd6ba7, 0x7ce65ae6, 0x57cb0925, 0x4ed03864, + 0x0191aea3, 0x188a9fe2, 0x33a7cc21, 0x2abcfd60, 0xad24e1af, + 0xb43fd0ee, 0x9f12832d, 0x8609b26c, 0xc94824ab, 0xd05315ea, + 0xfb7e4629, 0xe2657768, 0x2f3f79f6, 0x362448b7, 0x1d091b74, + 0x04122a35, 0x4b53bcf2, 0x52488db3, 0x7965de70, 0x607eef31, + 0xe7e6f3fe, 0xfefdc2bf, 0xd5d0917c, 0xcccba03d, 0x838a36fa, + 0x9a9107bb, 0xb1bc5478, 0xa8a76539, 0x3b83984b, 0x2298a90a, + 0x09b5fac9, 0x10aecb88, 0x5fef5d4f, 0x46f46c0e, 0x6dd93fcd, + 0x74c20e8c, 0xf35a1243, 0xea412302, 0xc16c70c1, 0xd8774180, + 0x9736d747, 0x8e2de606, 0xa500b5c5, 0xbc1b8484, 0x71418a1a, + 0x685abb5b, 0x4377e898, 0x5a6cd9d9, 0x152d4f1e, 0x0c367e5f, + 0x271b2d9c, 0x3e001cdd, 0xb9980012, 0xa0833153, 0x8bae6290, + 0x92b553d1, 0xddf4c516, 0xc4eff457, 0xefc2a794, 0xf6d996d5, + 0xae07bce9, 0xb71c8da8, 0x9c31de6b, 0x852aef2a, 0xca6b79ed, + 0xd37048ac, 0xf85d1b6f, 0xe1462a2e, 0x66de36e1, 0x7fc507a0, + 0x54e85463, 0x4df36522, 0x02b2f3e5, 0x1ba9c2a4, 0x30849167, + 0x299fa026, 0xe4c5aeb8, 0xfdde9ff9, 0xd6f3cc3a, 0xcfe8fd7b, + 0x80a96bbc, 0x99b25afd, 0xb29f093e, 0xab84387f, 0x2c1c24b0, + 0x350715f1, 0x1e2a4632, 0x07317773, 0x4870e1b4, 0x516bd0f5, + 0x7a468336, 0x635db277, 0xcbfad74e, 0xd2e1e60f, 0xf9ccb5cc, + 0xe0d7848d, 0xaf96124a, 0xb68d230b, 0x9da070c8, 0x84bb4189, + 0x03235d46, 0x1a386c07, 0x31153fc4, 0x280e0e85, 0x674f9842, + 0x7e54a903, 0x5579fac0, 0x4c62cb81, 0x8138c51f, 0x9823f45e, + 0xb30ea79d, 0xaa1596dc, 0xe554001b, 0xfc4f315a, 0xd7626299, + 0xce7953d8, 0x49e14f17, 0x50fa7e56, 0x7bd72d95, 0x62cc1cd4, + 0x2d8d8a13, 0x3496bb52, 0x1fbbe891, 0x06a0d9d0, 0x5e7ef3ec, + 0x4765c2ad, 0x6c48916e, 0x7553a02f, 0x3a1236e8, 0x230907a9, + 0x0824546a, 0x113f652b, 0x96a779e4, 0x8fbc48a5, 0xa4911b66, + 0xbd8a2a27, 0xf2cbbce0, 0xebd08da1, 0xc0fdde62, 0xd9e6ef23, + 0x14bce1bd, 0x0da7d0fc, 0x268a833f, 0x3f91b27e, 0x70d024b9, + 0x69cb15f8, 0x42e6463b, 0x5bfd777a, 0xdc656bb5, 0xc57e5af4, + 0xee530937, 0xf7483876, 0xb809aeb1, 0xa1129ff0, 0x8a3fcc33, + 0x9324fd72}, + {0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, + 0x706af48f, 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, + 0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, + 0x90bf1d91, 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, + 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, 0x136c9856, + 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9, + 0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, + 0xa2677172, 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, + 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, + 0x45df5c75, 0xdcd60dcf, 0xabd13d59, 0x26d930ac, 0x51de003a, + 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, 0xcfba9599, + 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, + 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190, + 0x01db7106, 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, + 0x9fbfe4a5, 0xe8b8d433, 0x7807c9a2, 0x0f00f934, 0x9609a88e, + 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01, + 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, 0x6c0695ed, + 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950, + 0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, + 0xfbd44c65, 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, + 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, + 0x346ed9fc, 0xad678846, 0xda60b8d0, 0x44042d73, 0x33031de5, + 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa, 0xbe0b1010, + 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, + 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, + 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, + 0x03b6e20c, 0x74b1d29a, 0xead54739, 0x9dd277af, 0x04db2615, + 0x73dc1683, 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, + 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, 0xf00f9344, + 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb, + 0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, + 0x67dd4acc, 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, + 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, + 0xa6bc5767, 0x3fb506dd, 0x48b2364b, 0xd80d2bda, 0xaf0a1b4c, + 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, 0x316e8eef, + 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, + 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, + 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, + 0x2cd99e8b, 0x5bdeae1d, 0x9b64c2b0, 0xec63f226, 0x756aa39c, + 0x026d930a, 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713, + 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 0x92d28e9b, + 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242, + 0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, + 0x18b74777, 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, + 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, 0xa00ae278, + 0xd70dd2ee, 0x4e048354, 0x3903b3c2, 0xa7672661, 0xd06016f7, + 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, 0x40df0b66, + 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, + 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, + 0xcdd70693, 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, + 0x5d681b02, 0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, + 0x2d02ef8d}}; + +local const z_word_t FAR crc_braid_big_table[][256] = { + {0x00000000, 0x96300777, 0x2c610eee, 0xba510999, 0x19c46d07, + 0x8ff46a70, 0x35a563e9, 0xa395649e, 0x3288db0e, 0xa4b8dc79, + 0x1ee9d5e0, 0x88d9d297, 0x2b4cb609, 0xbd7cb17e, 0x072db8e7, + 0x911dbf90, 0x6410b71d, 0xf220b06a, 0x4871b9f3, 0xde41be84, + 0x7dd4da1a, 0xebe4dd6d, 0x51b5d4f4, 0xc785d383, 0x56986c13, + 0xc0a86b64, 0x7af962fd, 0xecc9658a, 0x4f5c0114, 0xd96c0663, + 0x633d0ffa, 0xf50d088d, 0xc8206e3b, 0x5e10694c, 0xe44160d5, + 0x727167a2, 0xd1e4033c, 0x47d4044b, 0xfd850dd2, 0x6bb50aa5, + 0xfaa8b535, 0x6c98b242, 0xd6c9bbdb, 0x40f9bcac, 0xe36cd832, + 0x755cdf45, 0xcf0dd6dc, 0x593dd1ab, 0xac30d926, 0x3a00de51, + 0x8051d7c8, 0x1661d0bf, 0xb5f4b421, 0x23c4b356, 0x9995bacf, + 0x0fa5bdb8, 0x9eb80228, 0x0888055f, 0xb2d90cc6, 0x24e90bb1, + 0x877c6f2f, 0x114c6858, 0xab1d61c1, 0x3d2d66b6, 0x9041dc76, + 0x0671db01, 0xbc20d298, 0x2a10d5ef, 0x8985b171, 0x1fb5b606, + 0xa5e4bf9f, 0x33d4b8e8, 0xa2c90778, 0x34f9000f, 0x8ea80996, + 0x18980ee1, 0xbb0d6a7f, 0x2d3d6d08, 0x976c6491, 0x015c63e6, + 0xf4516b6b, 0x62616c1c, 0xd8306585, 0x4e0062f2, 0xed95066c, + 0x7ba5011b, 0xc1f40882, 0x57c40ff5, 0xc6d9b065, 0x50e9b712, + 0xeab8be8b, 0x7c88b9fc, 0xdf1ddd62, 0x492dda15, 0xf37cd38c, + 0x654cd4fb, 0x5861b24d, 0xce51b53a, 0x7400bca3, 0xe230bbd4, + 0x41a5df4a, 0xd795d83d, 0x6dc4d1a4, 0xfbf4d6d3, 0x6ae96943, + 0xfcd96e34, 0x468867ad, 0xd0b860da, 0x732d0444, 0xe51d0333, + 0x5f4c0aaa, 0xc97c0ddd, 0x3c710550, 0xaa410227, 0x10100bbe, + 0x86200cc9, 0x25b56857, 0xb3856f20, 0x09d466b9, 0x9fe461ce, + 0x0ef9de5e, 0x98c9d929, 0x2298d0b0, 0xb4a8d7c7, 0x173db359, + 0x810db42e, 0x3b5cbdb7, 0xad6cbac0, 0x2083b8ed, 0xb6b3bf9a, + 0x0ce2b603, 0x9ad2b174, 0x3947d5ea, 0xaf77d29d, 0x1526db04, + 0x8316dc73, 0x120b63e3, 0x843b6494, 0x3e6a6d0d, 0xa85a6a7a, + 0x0bcf0ee4, 0x9dff0993, 0x27ae000a, 0xb19e077d, 0x44930ff0, + 0xd2a30887, 0x68f2011e, 0xfec20669, 0x5d5762f7, 0xcb676580, + 0x71366c19, 0xe7066b6e, 0x761bd4fe, 0xe02bd389, 0x5a7ada10, + 0xcc4add67, 0x6fdfb9f9, 0xf9efbe8e, 0x43beb717, 0xd58eb060, + 0xe8a3d6d6, 0x7e93d1a1, 0xc4c2d838, 0x52f2df4f, 0xf167bbd1, + 0x6757bca6, 0xdd06b53f, 0x4b36b248, 0xda2b0dd8, 0x4c1b0aaf, + 0xf64a0336, 0x607a0441, 0xc3ef60df, 0x55df67a8, 0xef8e6e31, + 0x79be6946, 0x8cb361cb, 0x1a8366bc, 0xa0d26f25, 0x36e26852, + 0x95770ccc, 0x03470bbb, 0xb9160222, 0x2f260555, 0xbe3bbac5, + 0x280bbdb2, 0x925ab42b, 0x046ab35c, 0xa7ffd7c2, 0x31cfd0b5, + 0x8b9ed92c, 0x1daede5b, 0xb0c2649b, 0x26f263ec, 0x9ca36a75, + 0x0a936d02, 0xa906099c, 0x3f360eeb, 0x85670772, 0x13570005, + 0x824abf95, 0x147ab8e2, 0xae2bb17b, 0x381bb60c, 0x9b8ed292, + 0x0dbed5e5, 0xb7efdc7c, 0x21dfdb0b, 0xd4d2d386, 0x42e2d4f1, + 0xf8b3dd68, 0x6e83da1f, 0xcd16be81, 0x5b26b9f6, 0xe177b06f, + 0x7747b718, 0xe65a0888, 0x706a0fff, 0xca3b0666, 0x5c0b0111, + 0xff9e658f, 0x69ae62f8, 0xd3ff6b61, 0x45cf6c16, 0x78e20aa0, + 0xeed20dd7, 0x5483044e, 0xc2b30339, 0x612667a7, 0xf71660d0, + 0x4d476949, 0xdb776e3e, 0x4a6ad1ae, 0xdc5ad6d9, 0x660bdf40, + 0xf03bd837, 0x53aebca9, 0xc59ebbde, 0x7fcfb247, 0xe9ffb530, + 0x1cf2bdbd, 0x8ac2baca, 0x3093b353, 0xa6a3b424, 0x0536d0ba, + 0x9306d7cd, 0x2957de54, 0xbf67d923, 0x2e7a66b3, 0xb84a61c4, + 0x021b685d, 0x942b6f2a, 0x37be0bb4, 0xa18e0cc3, 0x1bdf055a, + 0x8def022d}, + {0x00000000, 0x41311b19, 0x82623632, 0xc3532d2b, 0x04c56c64, + 0x45f4777d, 0x86a75a56, 0xc796414f, 0x088ad9c8, 0x49bbc2d1, + 0x8ae8effa, 0xcbd9f4e3, 0x0c4fb5ac, 0x4d7eaeb5, 0x8e2d839e, + 0xcf1c9887, 0x5112c24a, 0x1023d953, 0xd370f478, 0x9241ef61, + 0x55d7ae2e, 0x14e6b537, 0xd7b5981c, 0x96848305, 0x59981b82, + 0x18a9009b, 0xdbfa2db0, 0x9acb36a9, 0x5d5d77e6, 0x1c6c6cff, + 0xdf3f41d4, 0x9e0e5acd, 0xa2248495, 0xe3159f8c, 0x2046b2a7, + 0x6177a9be, 0xa6e1e8f1, 0xe7d0f3e8, 0x2483dec3, 0x65b2c5da, + 0xaaae5d5d, 0xeb9f4644, 0x28cc6b6f, 0x69fd7076, 0xae6b3139, + 0xef5a2a20, 0x2c09070b, 0x6d381c12, 0xf33646df, 0xb2075dc6, + 0x715470ed, 0x30656bf4, 0xf7f32abb, 0xb6c231a2, 0x75911c89, + 0x34a00790, 0xfbbc9f17, 0xba8d840e, 0x79dea925, 0x38efb23c, + 0xff79f373, 0xbe48e86a, 0x7d1bc541, 0x3c2ade58, 0x054f79f0, + 0x447e62e9, 0x872d4fc2, 0xc61c54db, 0x018a1594, 0x40bb0e8d, + 0x83e823a6, 0xc2d938bf, 0x0dc5a038, 0x4cf4bb21, 0x8fa7960a, + 0xce968d13, 0x0900cc5c, 0x4831d745, 0x8b62fa6e, 0xca53e177, + 0x545dbbba, 0x156ca0a3, 0xd63f8d88, 0x970e9691, 0x5098d7de, + 0x11a9ccc7, 0xd2fae1ec, 0x93cbfaf5, 0x5cd76272, 0x1de6796b, + 0xdeb55440, 0x9f844f59, 0x58120e16, 0x1923150f, 0xda703824, + 0x9b41233d, 0xa76bfd65, 0xe65ae67c, 0x2509cb57, 0x6438d04e, + 0xa3ae9101, 0xe29f8a18, 0x21cca733, 0x60fdbc2a, 0xafe124ad, + 0xeed03fb4, 0x2d83129f, 0x6cb20986, 0xab2448c9, 0xea1553d0, + 0x29467efb, 0x687765e2, 0xf6793f2f, 0xb7482436, 0x741b091d, + 0x352a1204, 0xf2bc534b, 0xb38d4852, 0x70de6579, 0x31ef7e60, + 0xfef3e6e7, 0xbfc2fdfe, 0x7c91d0d5, 0x3da0cbcc, 0xfa368a83, + 0xbb07919a, 0x7854bcb1, 0x3965a7a8, 0x4b98833b, 0x0aa99822, + 0xc9fab509, 0x88cbae10, 0x4f5def5f, 0x0e6cf446, 0xcd3fd96d, + 0x8c0ec274, 0x43125af3, 0x022341ea, 0xc1706cc1, 0x804177d8, + 0x47d73697, 0x06e62d8e, 0xc5b500a5, 0x84841bbc, 0x1a8a4171, + 0x5bbb5a68, 0x98e87743, 0xd9d96c5a, 0x1e4f2d15, 0x5f7e360c, + 0x9c2d1b27, 0xdd1c003e, 0x120098b9, 0x533183a0, 0x9062ae8b, + 0xd153b592, 0x16c5f4dd, 0x57f4efc4, 0x94a7c2ef, 0xd596d9f6, + 0xe9bc07ae, 0xa88d1cb7, 0x6bde319c, 0x2aef2a85, 0xed796bca, + 0xac4870d3, 0x6f1b5df8, 0x2e2a46e1, 0xe136de66, 0xa007c57f, + 0x6354e854, 0x2265f34d, 0xe5f3b202, 0xa4c2a91b, 0x67918430, + 0x26a09f29, 0xb8aec5e4, 0xf99fdefd, 0x3accf3d6, 0x7bfde8cf, + 0xbc6ba980, 0xfd5ab299, 0x3e099fb2, 0x7f3884ab, 0xb0241c2c, + 0xf1150735, 0x32462a1e, 0x73773107, 0xb4e17048, 0xf5d06b51, + 0x3683467a, 0x77b25d63, 0x4ed7facb, 0x0fe6e1d2, 0xccb5ccf9, + 0x8d84d7e0, 0x4a1296af, 0x0b238db6, 0xc870a09d, 0x8941bb84, + 0x465d2303, 0x076c381a, 0xc43f1531, 0x850e0e28, 0x42984f67, + 0x03a9547e, 0xc0fa7955, 0x81cb624c, 0x1fc53881, 0x5ef42398, + 0x9da70eb3, 0xdc9615aa, 0x1b0054e5, 0x5a314ffc, 0x996262d7, + 0xd85379ce, 0x174fe149, 0x567efa50, 0x952dd77b, 0xd41ccc62, + 0x138a8d2d, 0x52bb9634, 0x91e8bb1f, 0xd0d9a006, 0xecf37e5e, + 0xadc26547, 0x6e91486c, 0x2fa05375, 0xe836123a, 0xa9070923, + 0x6a542408, 0x2b653f11, 0xe479a796, 0xa548bc8f, 0x661b91a4, + 0x272a8abd, 0xe0bccbf2, 0xa18dd0eb, 0x62defdc0, 0x23efe6d9, + 0xbde1bc14, 0xfcd0a70d, 0x3f838a26, 0x7eb2913f, 0xb924d070, + 0xf815cb69, 0x3b46e642, 0x7a77fd5b, 0xb56b65dc, 0xf45a7ec5, + 0x370953ee, 0x763848f7, 0xb1ae09b8, 0xf09f12a1, 0x33cc3f8a, + 0x72fd2493}, + {0x00000000, 0x376ac201, 0x6ed48403, 0x59be4602, 0xdca80907, + 0xebc2cb06, 0xb27c8d04, 0x85164f05, 0xb851130e, 0x8f3bd10f, + 0xd685970d, 0xe1ef550c, 0x64f91a09, 0x5393d808, 0x0a2d9e0a, + 0x3d475c0b, 0x70a3261c, 0x47c9e41d, 0x1e77a21f, 0x291d601e, + 0xac0b2f1b, 0x9b61ed1a, 0xc2dfab18, 0xf5b56919, 0xc8f23512, + 0xff98f713, 0xa626b111, 0x914c7310, 0x145a3c15, 0x2330fe14, + 0x7a8eb816, 0x4de47a17, 0xe0464d38, 0xd72c8f39, 0x8e92c93b, + 0xb9f80b3a, 0x3cee443f, 0x0b84863e, 0x523ac03c, 0x6550023d, + 0x58175e36, 0x6f7d9c37, 0x36c3da35, 0x01a91834, 0x84bf5731, + 0xb3d59530, 0xea6bd332, 0xdd011133, 0x90e56b24, 0xa78fa925, + 0xfe31ef27, 0xc95b2d26, 0x4c4d6223, 0x7b27a022, 0x2299e620, + 0x15f32421, 0x28b4782a, 0x1fdeba2b, 0x4660fc29, 0x710a3e28, + 0xf41c712d, 0xc376b32c, 0x9ac8f52e, 0xada2372f, 0xc08d9a70, + 0xf7e75871, 0xae591e73, 0x9933dc72, 0x1c259377, 0x2b4f5176, + 0x72f11774, 0x459bd575, 0x78dc897e, 0x4fb64b7f, 0x16080d7d, + 0x2162cf7c, 0xa4748079, 0x931e4278, 0xcaa0047a, 0xfdcac67b, + 0xb02ebc6c, 0x87447e6d, 0xdefa386f, 0xe990fa6e, 0x6c86b56b, + 0x5bec776a, 0x02523168, 0x3538f369, 0x087faf62, 0x3f156d63, + 0x66ab2b61, 0x51c1e960, 0xd4d7a665, 0xe3bd6464, 0xba032266, + 0x8d69e067, 0x20cbd748, 0x17a11549, 0x4e1f534b, 0x7975914a, + 0xfc63de4f, 0xcb091c4e, 0x92b75a4c, 0xa5dd984d, 0x989ac446, + 0xaff00647, 0xf64e4045, 0xc1248244, 0x4432cd41, 0x73580f40, + 0x2ae64942, 0x1d8c8b43, 0x5068f154, 0x67023355, 0x3ebc7557, + 0x09d6b756, 0x8cc0f853, 0xbbaa3a52, 0xe2147c50, 0xd57ebe51, + 0xe839e25a, 0xdf53205b, 0x86ed6659, 0xb187a458, 0x3491eb5d, + 0x03fb295c, 0x5a456f5e, 0x6d2fad5f, 0x801b35e1, 0xb771f7e0, + 0xeecfb1e2, 0xd9a573e3, 0x5cb33ce6, 0x6bd9fee7, 0x3267b8e5, + 0x050d7ae4, 0x384a26ef, 0x0f20e4ee, 0x569ea2ec, 0x61f460ed, + 0xe4e22fe8, 0xd388ede9, 0x8a36abeb, 0xbd5c69ea, 0xf0b813fd, + 0xc7d2d1fc, 0x9e6c97fe, 0xa90655ff, 0x2c101afa, 0x1b7ad8fb, + 0x42c49ef9, 0x75ae5cf8, 0x48e900f3, 0x7f83c2f2, 0x263d84f0, + 0x115746f1, 0x944109f4, 0xa32bcbf5, 0xfa958df7, 0xcdff4ff6, + 0x605d78d9, 0x5737bad8, 0x0e89fcda, 0x39e33edb, 0xbcf571de, + 0x8b9fb3df, 0xd221f5dd, 0xe54b37dc, 0xd80c6bd7, 0xef66a9d6, + 0xb6d8efd4, 0x81b22dd5, 0x04a462d0, 0x33cea0d1, 0x6a70e6d3, + 0x5d1a24d2, 0x10fe5ec5, 0x27949cc4, 0x7e2adac6, 0x494018c7, + 0xcc5657c2, 0xfb3c95c3, 0xa282d3c1, 0x95e811c0, 0xa8af4dcb, + 0x9fc58fca, 0xc67bc9c8, 0xf1110bc9, 0x740744cc, 0x436d86cd, + 0x1ad3c0cf, 0x2db902ce, 0x4096af91, 0x77fc6d90, 0x2e422b92, + 0x1928e993, 0x9c3ea696, 0xab546497, 0xf2ea2295, 0xc580e094, + 0xf8c7bc9f, 0xcfad7e9e, 0x9613389c, 0xa179fa9d, 0x246fb598, + 0x13057799, 0x4abb319b, 0x7dd1f39a, 0x3035898d, 0x075f4b8c, + 0x5ee10d8e, 0x698bcf8f, 0xec9d808a, 0xdbf7428b, 0x82490489, + 0xb523c688, 0x88649a83, 0xbf0e5882, 0xe6b01e80, 0xd1dadc81, + 0x54cc9384, 0x63a65185, 0x3a181787, 0x0d72d586, 0xa0d0e2a9, + 0x97ba20a8, 0xce0466aa, 0xf96ea4ab, 0x7c78ebae, 0x4b1229af, + 0x12ac6fad, 0x25c6adac, 0x1881f1a7, 0x2feb33a6, 0x765575a4, + 0x413fb7a5, 0xc429f8a0, 0xf3433aa1, 0xaafd7ca3, 0x9d97bea2, + 0xd073c4b5, 0xe71906b4, 0xbea740b6, 0x89cd82b7, 0x0cdbcdb2, + 0x3bb10fb3, 0x620f49b1, 0x55658bb0, 0x6822d7bb, 0x5f4815ba, + 0x06f653b8, 0x319c91b9, 0xb48adebc, 0x83e01cbd, 0xda5e5abf, + 0xed3498be}, + {0x00000000, 0x6567bcb8, 0x8bc809aa, 0xeeafb512, 0x5797628f, + 0x32f0de37, 0xdc5f6b25, 0xb938d79d, 0xef28b4c5, 0x8a4f087d, + 0x64e0bd6f, 0x018701d7, 0xb8bfd64a, 0xddd86af2, 0x3377dfe0, + 0x56106358, 0x9f571950, 0xfa30a5e8, 0x149f10fa, 0x71f8ac42, + 0xc8c07bdf, 0xada7c767, 0x43087275, 0x266fcecd, 0x707fad95, + 0x1518112d, 0xfbb7a43f, 0x9ed01887, 0x27e8cf1a, 0x428f73a2, + 0xac20c6b0, 0xc9477a08, 0x3eaf32a0, 0x5bc88e18, 0xb5673b0a, + 0xd00087b2, 0x6938502f, 0x0c5fec97, 0xe2f05985, 0x8797e53d, + 0xd1878665, 0xb4e03add, 0x5a4f8fcf, 0x3f283377, 0x8610e4ea, + 0xe3775852, 0x0dd8ed40, 0x68bf51f8, 0xa1f82bf0, 0xc49f9748, + 0x2a30225a, 0x4f579ee2, 0xf66f497f, 0x9308f5c7, 0x7da740d5, + 0x18c0fc6d, 0x4ed09f35, 0x2bb7238d, 0xc518969f, 0xa07f2a27, + 0x1947fdba, 0x7c204102, 0x928ff410, 0xf7e848a8, 0x3d58149b, + 0x583fa823, 0xb6901d31, 0xd3f7a189, 0x6acf7614, 0x0fa8caac, + 0xe1077fbe, 0x8460c306, 0xd270a05e, 0xb7171ce6, 0x59b8a9f4, + 0x3cdf154c, 0x85e7c2d1, 0xe0807e69, 0x0e2fcb7b, 0x6b4877c3, + 0xa20f0dcb, 0xc768b173, 0x29c70461, 0x4ca0b8d9, 0xf5986f44, + 0x90ffd3fc, 0x7e5066ee, 0x1b37da56, 0x4d27b90e, 0x284005b6, + 0xc6efb0a4, 0xa3880c1c, 0x1ab0db81, 0x7fd76739, 0x9178d22b, + 0xf41f6e93, 0x03f7263b, 0x66909a83, 0x883f2f91, 0xed589329, + 0x546044b4, 0x3107f80c, 0xdfa84d1e, 0xbacff1a6, 0xecdf92fe, + 0x89b82e46, 0x67179b54, 0x027027ec, 0xbb48f071, 0xde2f4cc9, + 0x3080f9db, 0x55e74563, 0x9ca03f6b, 0xf9c783d3, 0x176836c1, + 0x720f8a79, 0xcb375de4, 0xae50e15c, 0x40ff544e, 0x2598e8f6, + 0x73888bae, 0x16ef3716, 0xf8408204, 0x9d273ebc, 0x241fe921, + 0x41785599, 0xafd7e08b, 0xcab05c33, 0x3bb659ed, 0x5ed1e555, + 0xb07e5047, 0xd519ecff, 0x6c213b62, 0x094687da, 0xe7e932c8, + 0x828e8e70, 0xd49eed28, 0xb1f95190, 0x5f56e482, 0x3a31583a, + 0x83098fa7, 0xe66e331f, 0x08c1860d, 0x6da63ab5, 0xa4e140bd, + 0xc186fc05, 0x2f294917, 0x4a4ef5af, 0xf3762232, 0x96119e8a, + 0x78be2b98, 0x1dd99720, 0x4bc9f478, 0x2eae48c0, 0xc001fdd2, + 0xa566416a, 0x1c5e96f7, 0x79392a4f, 0x97969f5d, 0xf2f123e5, + 0x05196b4d, 0x607ed7f5, 0x8ed162e7, 0xebb6de5f, 0x528e09c2, + 0x37e9b57a, 0xd9460068, 0xbc21bcd0, 0xea31df88, 0x8f566330, + 0x61f9d622, 0x049e6a9a, 0xbda6bd07, 0xd8c101bf, 0x366eb4ad, + 0x53090815, 0x9a4e721d, 0xff29cea5, 0x11867bb7, 0x74e1c70f, + 0xcdd91092, 0xa8beac2a, 0x46111938, 0x2376a580, 0x7566c6d8, + 0x10017a60, 0xfeaecf72, 0x9bc973ca, 0x22f1a457, 0x479618ef, + 0xa939adfd, 0xcc5e1145, 0x06ee4d76, 0x6389f1ce, 0x8d2644dc, + 0xe841f864, 0x51792ff9, 0x341e9341, 0xdab12653, 0xbfd69aeb, + 0xe9c6f9b3, 0x8ca1450b, 0x620ef019, 0x07694ca1, 0xbe519b3c, + 0xdb362784, 0x35999296, 0x50fe2e2e, 0x99b95426, 0xfcdee89e, + 0x12715d8c, 0x7716e134, 0xce2e36a9, 0xab498a11, 0x45e63f03, + 0x208183bb, 0x7691e0e3, 0x13f65c5b, 0xfd59e949, 0x983e55f1, + 0x2106826c, 0x44613ed4, 0xaace8bc6, 0xcfa9377e, 0x38417fd6, + 0x5d26c36e, 0xb389767c, 0xd6eecac4, 0x6fd61d59, 0x0ab1a1e1, + 0xe41e14f3, 0x8179a84b, 0xd769cb13, 0xb20e77ab, 0x5ca1c2b9, + 0x39c67e01, 0x80fea99c, 0xe5991524, 0x0b36a036, 0x6e511c8e, + 0xa7166686, 0xc271da3e, 0x2cde6f2c, 0x49b9d394, 0xf0810409, + 0x95e6b8b1, 0x7b490da3, 0x1e2eb11b, 0x483ed243, 0x2d596efb, + 0xc3f6dbe9, 0xa6916751, 0x1fa9b0cc, 0x7ace0c74, 0x9461b966, + 0xf10605de}}; + +#endif + +#endif + +#if N == 2 + +#if W == 8 + +local const z_crc_t FAR crc_braid_table[][256] = { + {0x00000000, 0xae689191, 0x87a02563, 0x29c8b4f2, 0xd4314c87, + 0x7a59dd16, 0x539169e4, 0xfdf9f875, 0x73139f4f, 0xdd7b0ede, + 0xf4b3ba2c, 0x5adb2bbd, 0xa722d3c8, 0x094a4259, 0x2082f6ab, + 0x8eea673a, 0xe6273e9e, 0x484faf0f, 0x61871bfd, 0xcfef8a6c, + 0x32167219, 0x9c7ee388, 0xb5b6577a, 0x1bdec6eb, 0x9534a1d1, + 0x3b5c3040, 0x129484b2, 0xbcfc1523, 0x4105ed56, 0xef6d7cc7, + 0xc6a5c835, 0x68cd59a4, 0x173f7b7d, 0xb957eaec, 0x909f5e1e, + 0x3ef7cf8f, 0xc30e37fa, 0x6d66a66b, 0x44ae1299, 0xeac68308, + 0x642ce432, 0xca4475a3, 0xe38cc151, 0x4de450c0, 0xb01da8b5, + 0x1e753924, 0x37bd8dd6, 0x99d51c47, 0xf11845e3, 0x5f70d472, + 0x76b86080, 0xd8d0f111, 0x25290964, 0x8b4198f5, 0xa2892c07, + 0x0ce1bd96, 0x820bdaac, 0x2c634b3d, 0x05abffcf, 0xabc36e5e, + 0x563a962b, 0xf85207ba, 0xd19ab348, 0x7ff222d9, 0x2e7ef6fa, + 0x8016676b, 0xa9ded399, 0x07b64208, 0xfa4fba7d, 0x54272bec, + 0x7def9f1e, 0xd3870e8f, 0x5d6d69b5, 0xf305f824, 0xdacd4cd6, + 0x74a5dd47, 0x895c2532, 0x2734b4a3, 0x0efc0051, 0xa09491c0, + 0xc859c864, 0x663159f5, 0x4ff9ed07, 0xe1917c96, 0x1c6884e3, + 0xb2001572, 0x9bc8a180, 0x35a03011, 0xbb4a572b, 0x1522c6ba, + 0x3cea7248, 0x9282e3d9, 0x6f7b1bac, 0xc1138a3d, 0xe8db3ecf, + 0x46b3af5e, 0x39418d87, 0x97291c16, 0xbee1a8e4, 0x10893975, + 0xed70c100, 0x43185091, 0x6ad0e463, 0xc4b875f2, 0x4a5212c8, + 0xe43a8359, 0xcdf237ab, 0x639aa63a, 0x9e635e4f, 0x300bcfde, + 0x19c37b2c, 0xb7abeabd, 0xdf66b319, 0x710e2288, 0x58c6967a, + 0xf6ae07eb, 0x0b57ff9e, 0xa53f6e0f, 0x8cf7dafd, 0x229f4b6c, + 0xac752c56, 0x021dbdc7, 0x2bd50935, 0x85bd98a4, 0x784460d1, + 0xd62cf140, 0xffe445b2, 0x518cd423, 0x5cfdedf4, 0xf2957c65, + 0xdb5dc897, 0x75355906, 0x88cca173, 0x26a430e2, 0x0f6c8410, + 0xa1041581, 0x2fee72bb, 0x8186e32a, 0xa84e57d8, 0x0626c649, + 0xfbdf3e3c, 0x55b7afad, 0x7c7f1b5f, 0xd2178ace, 0xbadad36a, + 0x14b242fb, 0x3d7af609, 0x93126798, 0x6eeb9fed, 0xc0830e7c, + 0xe94bba8e, 0x47232b1f, 0xc9c94c25, 0x67a1ddb4, 0x4e696946, + 0xe001f8d7, 0x1df800a2, 0xb3909133, 0x9a5825c1, 0x3430b450, + 0x4bc29689, 0xe5aa0718, 0xcc62b3ea, 0x620a227b, 0x9ff3da0e, + 0x319b4b9f, 0x1853ff6d, 0xb63b6efc, 0x38d109c6, 0x96b99857, + 0xbf712ca5, 0x1119bd34, 0xece04541, 0x4288d4d0, 0x6b406022, + 0xc528f1b3, 0xade5a817, 0x038d3986, 0x2a458d74, 0x842d1ce5, + 0x79d4e490, 0xd7bc7501, 0xfe74c1f3, 0x501c5062, 0xdef63758, + 0x709ea6c9, 0x5956123b, 0xf73e83aa, 0x0ac77bdf, 0xa4afea4e, + 0x8d675ebc, 0x230fcf2d, 0x72831b0e, 0xdceb8a9f, 0xf5233e6d, + 0x5b4baffc, 0xa6b25789, 0x08dac618, 0x211272ea, 0x8f7ae37b, + 0x01908441, 0xaff815d0, 0x8630a122, 0x285830b3, 0xd5a1c8c6, + 0x7bc95957, 0x5201eda5, 0xfc697c34, 0x94a42590, 0x3accb401, + 0x130400f3, 0xbd6c9162, 0x40956917, 0xeefdf886, 0xc7354c74, + 0x695ddde5, 0xe7b7badf, 0x49df2b4e, 0x60179fbc, 0xce7f0e2d, + 0x3386f658, 0x9dee67c9, 0xb426d33b, 0x1a4e42aa, 0x65bc6073, + 0xcbd4f1e2, 0xe21c4510, 0x4c74d481, 0xb18d2cf4, 0x1fe5bd65, + 0x362d0997, 0x98459806, 0x16afff3c, 0xb8c76ead, 0x910fda5f, + 0x3f674bce, 0xc29eb3bb, 0x6cf6222a, 0x453e96d8, 0xeb560749, + 0x839b5eed, 0x2df3cf7c, 0x043b7b8e, 0xaa53ea1f, 0x57aa126a, + 0xf9c283fb, 0xd00a3709, 0x7e62a698, 0xf088c1a2, 0x5ee05033, + 0x7728e4c1, 0xd9407550, 0x24b98d25, 0x8ad11cb4, 0xa319a846, + 0x0d7139d7}, + {0x00000000, 0xb9fbdbe8, 0xa886b191, 0x117d6a79, 0x8a7c6563, + 0x3387be8b, 0x22fad4f2, 0x9b010f1a, 0xcf89cc87, 0x7672176f, + 0x670f7d16, 0xdef4a6fe, 0x45f5a9e4, 0xfc0e720c, 0xed731875, + 0x5488c39d, 0x44629f4f, 0xfd9944a7, 0xece42ede, 0x551ff536, + 0xce1efa2c, 0x77e521c4, 0x66984bbd, 0xdf639055, 0x8beb53c8, + 0x32108820, 0x236de259, 0x9a9639b1, 0x019736ab, 0xb86ced43, + 0xa911873a, 0x10ea5cd2, 0x88c53e9e, 0x313ee576, 0x20438f0f, + 0x99b854e7, 0x02b95bfd, 0xbb428015, 0xaa3fea6c, 0x13c43184, + 0x474cf219, 0xfeb729f1, 0xefca4388, 0x56319860, 0xcd30977a, + 0x74cb4c92, 0x65b626eb, 0xdc4dfd03, 0xcca7a1d1, 0x755c7a39, + 0x64211040, 0xdddacba8, 0x46dbc4b2, 0xff201f5a, 0xee5d7523, + 0x57a6aecb, 0x032e6d56, 0xbad5b6be, 0xaba8dcc7, 0x1253072f, + 0x89520835, 0x30a9d3dd, 0x21d4b9a4, 0x982f624c, 0xcafb7b7d, + 0x7300a095, 0x627dcaec, 0xdb861104, 0x40871e1e, 0xf97cc5f6, + 0xe801af8f, 0x51fa7467, 0x0572b7fa, 0xbc896c12, 0xadf4066b, + 0x140fdd83, 0x8f0ed299, 0x36f50971, 0x27886308, 0x9e73b8e0, + 0x8e99e432, 0x37623fda, 0x261f55a3, 0x9fe48e4b, 0x04e58151, + 0xbd1e5ab9, 0xac6330c0, 0x1598eb28, 0x411028b5, 0xf8ebf35d, + 0xe9969924, 0x506d42cc, 0xcb6c4dd6, 0x7297963e, 0x63eafc47, + 0xda1127af, 0x423e45e3, 0xfbc59e0b, 0xeab8f472, 0x53432f9a, + 0xc8422080, 0x71b9fb68, 0x60c49111, 0xd93f4af9, 0x8db78964, + 0x344c528c, 0x253138f5, 0x9ccae31d, 0x07cbec07, 0xbe3037ef, + 0xaf4d5d96, 0x16b6867e, 0x065cdaac, 0xbfa70144, 0xaeda6b3d, + 0x1721b0d5, 0x8c20bfcf, 0x35db6427, 0x24a60e5e, 0x9d5dd5b6, + 0xc9d5162b, 0x702ecdc3, 0x6153a7ba, 0xd8a87c52, 0x43a97348, + 0xfa52a8a0, 0xeb2fc2d9, 0x52d41931, 0x4e87f0bb, 0xf77c2b53, + 0xe601412a, 0x5ffa9ac2, 0xc4fb95d8, 0x7d004e30, 0x6c7d2449, + 0xd586ffa1, 0x810e3c3c, 0x38f5e7d4, 0x29888dad, 0x90735645, + 0x0b72595f, 0xb28982b7, 0xa3f4e8ce, 0x1a0f3326, 0x0ae56ff4, + 0xb31eb41c, 0xa263de65, 0x1b98058d, 0x80990a97, 0x3962d17f, + 0x281fbb06, 0x91e460ee, 0xc56ca373, 0x7c97789b, 0x6dea12e2, + 0xd411c90a, 0x4f10c610, 0xf6eb1df8, 0xe7967781, 0x5e6dac69, + 0xc642ce25, 0x7fb915cd, 0x6ec47fb4, 0xd73fa45c, 0x4c3eab46, + 0xf5c570ae, 0xe4b81ad7, 0x5d43c13f, 0x09cb02a2, 0xb030d94a, + 0xa14db333, 0x18b668db, 0x83b767c1, 0x3a4cbc29, 0x2b31d650, + 0x92ca0db8, 0x8220516a, 0x3bdb8a82, 0x2aa6e0fb, 0x935d3b13, + 0x085c3409, 0xb1a7efe1, 0xa0da8598, 0x19215e70, 0x4da99ded, + 0xf4524605, 0xe52f2c7c, 0x5cd4f794, 0xc7d5f88e, 0x7e2e2366, + 0x6f53491f, 0xd6a892f7, 0x847c8bc6, 0x3d87502e, 0x2cfa3a57, + 0x9501e1bf, 0x0e00eea5, 0xb7fb354d, 0xa6865f34, 0x1f7d84dc, + 0x4bf54741, 0xf20e9ca9, 0xe373f6d0, 0x5a882d38, 0xc1892222, + 0x7872f9ca, 0x690f93b3, 0xd0f4485b, 0xc01e1489, 0x79e5cf61, + 0x6898a518, 0xd1637ef0, 0x4a6271ea, 0xf399aa02, 0xe2e4c07b, + 0x5b1f1b93, 0x0f97d80e, 0xb66c03e6, 0xa711699f, 0x1eeab277, + 0x85ebbd6d, 0x3c106685, 0x2d6d0cfc, 0x9496d714, 0x0cb9b558, + 0xb5426eb0, 0xa43f04c9, 0x1dc4df21, 0x86c5d03b, 0x3f3e0bd3, + 0x2e4361aa, 0x97b8ba42, 0xc33079df, 0x7acba237, 0x6bb6c84e, + 0xd24d13a6, 0x494c1cbc, 0xf0b7c754, 0xe1caad2d, 0x583176c5, + 0x48db2a17, 0xf120f1ff, 0xe05d9b86, 0x59a6406e, 0xc2a74f74, + 0x7b5c949c, 0x6a21fee5, 0xd3da250d, 0x8752e690, 0x3ea93d78, + 0x2fd45701, 0x962f8ce9, 0x0d2e83f3, 0xb4d5581b, 0xa5a83262, + 0x1c53e98a}, + {0x00000000, 0x9d0fe176, 0xe16ec4ad, 0x7c6125db, 0x19ac8f1b, + 0x84a36e6d, 0xf8c24bb6, 0x65cdaac0, 0x33591e36, 0xae56ff40, + 0xd237da9b, 0x4f383bed, 0x2af5912d, 0xb7fa705b, 0xcb9b5580, + 0x5694b4f6, 0x66b23c6c, 0xfbbddd1a, 0x87dcf8c1, 0x1ad319b7, + 0x7f1eb377, 0xe2115201, 0x9e7077da, 0x037f96ac, 0x55eb225a, + 0xc8e4c32c, 0xb485e6f7, 0x298a0781, 0x4c47ad41, 0xd1484c37, + 0xad2969ec, 0x3026889a, 0xcd6478d8, 0x506b99ae, 0x2c0abc75, + 0xb1055d03, 0xd4c8f7c3, 0x49c716b5, 0x35a6336e, 0xa8a9d218, + 0xfe3d66ee, 0x63328798, 0x1f53a243, 0x825c4335, 0xe791e9f5, + 0x7a9e0883, 0x06ff2d58, 0x9bf0cc2e, 0xabd644b4, 0x36d9a5c2, + 0x4ab88019, 0xd7b7616f, 0xb27acbaf, 0x2f752ad9, 0x53140f02, + 0xce1bee74, 0x988f5a82, 0x0580bbf4, 0x79e19e2f, 0xe4ee7f59, + 0x8123d599, 0x1c2c34ef, 0x604d1134, 0xfd42f042, 0x41b9f7f1, + 0xdcb61687, 0xa0d7335c, 0x3dd8d22a, 0x581578ea, 0xc51a999c, + 0xb97bbc47, 0x24745d31, 0x72e0e9c7, 0xefef08b1, 0x938e2d6a, + 0x0e81cc1c, 0x6b4c66dc, 0xf64387aa, 0x8a22a271, 0x172d4307, + 0x270bcb9d, 0xba042aeb, 0xc6650f30, 0x5b6aee46, 0x3ea74486, + 0xa3a8a5f0, 0xdfc9802b, 0x42c6615d, 0x1452d5ab, 0x895d34dd, + 0xf53c1106, 0x6833f070, 0x0dfe5ab0, 0x90f1bbc6, 0xec909e1d, + 0x719f7f6b, 0x8cdd8f29, 0x11d26e5f, 0x6db34b84, 0xf0bcaaf2, + 0x95710032, 0x087ee144, 0x741fc49f, 0xe91025e9, 0xbf84911f, + 0x228b7069, 0x5eea55b2, 0xc3e5b4c4, 0xa6281e04, 0x3b27ff72, + 0x4746daa9, 0xda493bdf, 0xea6fb345, 0x77605233, 0x0b0177e8, + 0x960e969e, 0xf3c33c5e, 0x6eccdd28, 0x12adf8f3, 0x8fa21985, + 0xd936ad73, 0x44394c05, 0x385869de, 0xa55788a8, 0xc09a2268, + 0x5d95c31e, 0x21f4e6c5, 0xbcfb07b3, 0x8373efe2, 0x1e7c0e94, + 0x621d2b4f, 0xff12ca39, 0x9adf60f9, 0x07d0818f, 0x7bb1a454, + 0xe6be4522, 0xb02af1d4, 0x2d2510a2, 0x51443579, 0xcc4bd40f, + 0xa9867ecf, 0x34899fb9, 0x48e8ba62, 0xd5e75b14, 0xe5c1d38e, + 0x78ce32f8, 0x04af1723, 0x99a0f655, 0xfc6d5c95, 0x6162bde3, + 0x1d039838, 0x800c794e, 0xd698cdb8, 0x4b972cce, 0x37f60915, + 0xaaf9e863, 0xcf3442a3, 0x523ba3d5, 0x2e5a860e, 0xb3556778, + 0x4e17973a, 0xd318764c, 0xaf795397, 0x3276b2e1, 0x57bb1821, + 0xcab4f957, 0xb6d5dc8c, 0x2bda3dfa, 0x7d4e890c, 0xe041687a, + 0x9c204da1, 0x012facd7, 0x64e20617, 0xf9ede761, 0x858cc2ba, + 0x188323cc, 0x28a5ab56, 0xb5aa4a20, 0xc9cb6ffb, 0x54c48e8d, + 0x3109244d, 0xac06c53b, 0xd067e0e0, 0x4d680196, 0x1bfcb560, + 0x86f35416, 0xfa9271cd, 0x679d90bb, 0x02503a7b, 0x9f5fdb0d, + 0xe33efed6, 0x7e311fa0, 0xc2ca1813, 0x5fc5f965, 0x23a4dcbe, + 0xbeab3dc8, 0xdb669708, 0x4669767e, 0x3a0853a5, 0xa707b2d3, + 0xf1930625, 0x6c9ce753, 0x10fdc288, 0x8df223fe, 0xe83f893e, + 0x75306848, 0x09514d93, 0x945eace5, 0xa478247f, 0x3977c509, + 0x4516e0d2, 0xd81901a4, 0xbdd4ab64, 0x20db4a12, 0x5cba6fc9, + 0xc1b58ebf, 0x97213a49, 0x0a2edb3f, 0x764ffee4, 0xeb401f92, + 0x8e8db552, 0x13825424, 0x6fe371ff, 0xf2ec9089, 0x0fae60cb, + 0x92a181bd, 0xeec0a466, 0x73cf4510, 0x1602efd0, 0x8b0d0ea6, + 0xf76c2b7d, 0x6a63ca0b, 0x3cf77efd, 0xa1f89f8b, 0xdd99ba50, + 0x40965b26, 0x255bf1e6, 0xb8541090, 0xc435354b, 0x593ad43d, + 0x691c5ca7, 0xf413bdd1, 0x8872980a, 0x157d797c, 0x70b0d3bc, + 0xedbf32ca, 0x91de1711, 0x0cd1f667, 0x5a454291, 0xc74aa3e7, + 0xbb2b863c, 0x2624674a, 0x43e9cd8a, 0xdee62cfc, 0xa2870927, + 0x3f88e851}, + {0x00000000, 0xdd96d985, 0x605cb54b, 0xbdca6cce, 0xc0b96a96, + 0x1d2fb313, 0xa0e5dfdd, 0x7d730658, 0x5a03d36d, 0x87950ae8, + 0x3a5f6626, 0xe7c9bfa3, 0x9abab9fb, 0x472c607e, 0xfae60cb0, + 0x2770d535, 0xb407a6da, 0x69917f5f, 0xd45b1391, 0x09cdca14, + 0x74becc4c, 0xa92815c9, 0x14e27907, 0xc974a082, 0xee0475b7, + 0x3392ac32, 0x8e58c0fc, 0x53ce1979, 0x2ebd1f21, 0xf32bc6a4, + 0x4ee1aa6a, 0x937773ef, 0xb37e4bf5, 0x6ee89270, 0xd322febe, + 0x0eb4273b, 0x73c72163, 0xae51f8e6, 0x139b9428, 0xce0d4dad, + 0xe97d9898, 0x34eb411d, 0x89212dd3, 0x54b7f456, 0x29c4f20e, + 0xf4522b8b, 0x49984745, 0x940e9ec0, 0x0779ed2f, 0xdaef34aa, + 0x67255864, 0xbab381e1, 0xc7c087b9, 0x1a565e3c, 0xa79c32f2, + 0x7a0aeb77, 0x5d7a3e42, 0x80ece7c7, 0x3d268b09, 0xe0b0528c, + 0x9dc354d4, 0x40558d51, 0xfd9fe19f, 0x2009381a, 0xbd8d91ab, + 0x601b482e, 0xddd124e0, 0x0047fd65, 0x7d34fb3d, 0xa0a222b8, + 0x1d684e76, 0xc0fe97f3, 0xe78e42c6, 0x3a189b43, 0x87d2f78d, + 0x5a442e08, 0x27372850, 0xfaa1f1d5, 0x476b9d1b, 0x9afd449e, + 0x098a3771, 0xd41ceef4, 0x69d6823a, 0xb4405bbf, 0xc9335de7, + 0x14a58462, 0xa96fe8ac, 0x74f93129, 0x5389e41c, 0x8e1f3d99, + 0x33d55157, 0xee4388d2, 0x93308e8a, 0x4ea6570f, 0xf36c3bc1, + 0x2efae244, 0x0ef3da5e, 0xd36503db, 0x6eaf6f15, 0xb339b690, + 0xce4ab0c8, 0x13dc694d, 0xae160583, 0x7380dc06, 0x54f00933, + 0x8966d0b6, 0x34acbc78, 0xe93a65fd, 0x944963a5, 0x49dfba20, + 0xf415d6ee, 0x29830f6b, 0xbaf47c84, 0x6762a501, 0xdaa8c9cf, + 0x073e104a, 0x7a4d1612, 0xa7dbcf97, 0x1a11a359, 0xc7877adc, + 0xe0f7afe9, 0x3d61766c, 0x80ab1aa2, 0x5d3dc327, 0x204ec57f, + 0xfdd81cfa, 0x40127034, 0x9d84a9b1, 0xa06a2517, 0x7dfcfc92, + 0xc036905c, 0x1da049d9, 0x60d34f81, 0xbd459604, 0x008ffaca, + 0xdd19234f, 0xfa69f67a, 0x27ff2fff, 0x9a354331, 0x47a39ab4, + 0x3ad09cec, 0xe7464569, 0x5a8c29a7, 0x871af022, 0x146d83cd, + 0xc9fb5a48, 0x74313686, 0xa9a7ef03, 0xd4d4e95b, 0x094230de, + 0xb4885c10, 0x691e8595, 0x4e6e50a0, 0x93f88925, 0x2e32e5eb, + 0xf3a43c6e, 0x8ed73a36, 0x5341e3b3, 0xee8b8f7d, 0x331d56f8, + 0x13146ee2, 0xce82b767, 0x7348dba9, 0xaede022c, 0xd3ad0474, + 0x0e3bddf1, 0xb3f1b13f, 0x6e6768ba, 0x4917bd8f, 0x9481640a, + 0x294b08c4, 0xf4ddd141, 0x89aed719, 0x54380e9c, 0xe9f26252, + 0x3464bbd7, 0xa713c838, 0x7a8511bd, 0xc74f7d73, 0x1ad9a4f6, + 0x67aaa2ae, 0xba3c7b2b, 0x07f617e5, 0xda60ce60, 0xfd101b55, + 0x2086c2d0, 0x9d4cae1e, 0x40da779b, 0x3da971c3, 0xe03fa846, + 0x5df5c488, 0x80631d0d, 0x1de7b4bc, 0xc0716d39, 0x7dbb01f7, + 0xa02dd872, 0xdd5ede2a, 0x00c807af, 0xbd026b61, 0x6094b2e4, + 0x47e467d1, 0x9a72be54, 0x27b8d29a, 0xfa2e0b1f, 0x875d0d47, + 0x5acbd4c2, 0xe701b80c, 0x3a976189, 0xa9e01266, 0x7476cbe3, + 0xc9bca72d, 0x142a7ea8, 0x695978f0, 0xb4cfa175, 0x0905cdbb, + 0xd493143e, 0xf3e3c10b, 0x2e75188e, 0x93bf7440, 0x4e29adc5, + 0x335aab9d, 0xeecc7218, 0x53061ed6, 0x8e90c753, 0xae99ff49, + 0x730f26cc, 0xcec54a02, 0x13539387, 0x6e2095df, 0xb3b64c5a, + 0x0e7c2094, 0xd3eaf911, 0xf49a2c24, 0x290cf5a1, 0x94c6996f, + 0x495040ea, 0x342346b2, 0xe9b59f37, 0x547ff3f9, 0x89e92a7c, + 0x1a9e5993, 0xc7088016, 0x7ac2ecd8, 0xa754355d, 0xda273305, + 0x07b1ea80, 0xba7b864e, 0x67ed5fcb, 0x409d8afe, 0x9d0b537b, + 0x20c13fb5, 0xfd57e630, 0x8024e068, 0x5db239ed, 0xe0785523, + 0x3dee8ca6}, + {0x00000000, 0x9ba54c6f, 0xec3b9e9f, 0x779ed2f0, 0x03063b7f, + 0x98a37710, 0xef3da5e0, 0x7498e98f, 0x060c76fe, 0x9da93a91, + 0xea37e861, 0x7192a40e, 0x050a4d81, 0x9eaf01ee, 0xe931d31e, + 0x72949f71, 0x0c18edfc, 0x97bda193, 0xe0237363, 0x7b863f0c, + 0x0f1ed683, 0x94bb9aec, 0xe325481c, 0x78800473, 0x0a149b02, + 0x91b1d76d, 0xe62f059d, 0x7d8a49f2, 0x0912a07d, 0x92b7ec12, + 0xe5293ee2, 0x7e8c728d, 0x1831dbf8, 0x83949797, 0xf40a4567, + 0x6faf0908, 0x1b37e087, 0x8092ace8, 0xf70c7e18, 0x6ca93277, + 0x1e3dad06, 0x8598e169, 0xf2063399, 0x69a37ff6, 0x1d3b9679, + 0x869eda16, 0xf10008e6, 0x6aa54489, 0x14293604, 0x8f8c7a6b, + 0xf812a89b, 0x63b7e4f4, 0x172f0d7b, 0x8c8a4114, 0xfb1493e4, + 0x60b1df8b, 0x122540fa, 0x89800c95, 0xfe1ede65, 0x65bb920a, + 0x11237b85, 0x8a8637ea, 0xfd18e51a, 0x66bda975, 0x3063b7f0, + 0xabc6fb9f, 0xdc58296f, 0x47fd6500, 0x33658c8f, 0xa8c0c0e0, + 0xdf5e1210, 0x44fb5e7f, 0x366fc10e, 0xadca8d61, 0xda545f91, + 0x41f113fe, 0x3569fa71, 0xaeccb61e, 0xd95264ee, 0x42f72881, + 0x3c7b5a0c, 0xa7de1663, 0xd040c493, 0x4be588fc, 0x3f7d6173, + 0xa4d82d1c, 0xd346ffec, 0x48e3b383, 0x3a772cf2, 0xa1d2609d, + 0xd64cb26d, 0x4de9fe02, 0x3971178d, 0xa2d45be2, 0xd54a8912, + 0x4eefc57d, 0x28526c08, 0xb3f72067, 0xc469f297, 0x5fccbef8, + 0x2b545777, 0xb0f11b18, 0xc76fc9e8, 0x5cca8587, 0x2e5e1af6, + 0xb5fb5699, 0xc2658469, 0x59c0c806, 0x2d582189, 0xb6fd6de6, + 0xc163bf16, 0x5ac6f379, 0x244a81f4, 0xbfefcd9b, 0xc8711f6b, + 0x53d45304, 0x274cba8b, 0xbce9f6e4, 0xcb772414, 0x50d2687b, + 0x2246f70a, 0xb9e3bb65, 0xce7d6995, 0x55d825fa, 0x2140cc75, + 0xbae5801a, 0xcd7b52ea, 0x56de1e85, 0x60c76fe0, 0xfb62238f, + 0x8cfcf17f, 0x1759bd10, 0x63c1549f, 0xf86418f0, 0x8ffaca00, + 0x145f866f, 0x66cb191e, 0xfd6e5571, 0x8af08781, 0x1155cbee, + 0x65cd2261, 0xfe686e0e, 0x89f6bcfe, 0x1253f091, 0x6cdf821c, + 0xf77ace73, 0x80e41c83, 0x1b4150ec, 0x6fd9b963, 0xf47cf50c, + 0x83e227fc, 0x18476b93, 0x6ad3f4e2, 0xf176b88d, 0x86e86a7d, + 0x1d4d2612, 0x69d5cf9d, 0xf27083f2, 0x85ee5102, 0x1e4b1d6d, + 0x78f6b418, 0xe353f877, 0x94cd2a87, 0x0f6866e8, 0x7bf08f67, + 0xe055c308, 0x97cb11f8, 0x0c6e5d97, 0x7efac2e6, 0xe55f8e89, + 0x92c15c79, 0x09641016, 0x7dfcf999, 0xe659b5f6, 0x91c76706, + 0x0a622b69, 0x74ee59e4, 0xef4b158b, 0x98d5c77b, 0x03708b14, + 0x77e8629b, 0xec4d2ef4, 0x9bd3fc04, 0x0076b06b, 0x72e22f1a, + 0xe9476375, 0x9ed9b185, 0x057cfdea, 0x71e41465, 0xea41580a, + 0x9ddf8afa, 0x067ac695, 0x50a4d810, 0xcb01947f, 0xbc9f468f, + 0x273a0ae0, 0x53a2e36f, 0xc807af00, 0xbf997df0, 0x243c319f, + 0x56a8aeee, 0xcd0de281, 0xba933071, 0x21367c1e, 0x55ae9591, + 0xce0bd9fe, 0xb9950b0e, 0x22304761, 0x5cbc35ec, 0xc7197983, + 0xb087ab73, 0x2b22e71c, 0x5fba0e93, 0xc41f42fc, 0xb381900c, + 0x2824dc63, 0x5ab04312, 0xc1150f7d, 0xb68bdd8d, 0x2d2e91e2, + 0x59b6786d, 0xc2133402, 0xb58de6f2, 0x2e28aa9d, 0x489503e8, + 0xd3304f87, 0xa4ae9d77, 0x3f0bd118, 0x4b933897, 0xd03674f8, + 0xa7a8a608, 0x3c0dea67, 0x4e997516, 0xd53c3979, 0xa2a2eb89, + 0x3907a7e6, 0x4d9f4e69, 0xd63a0206, 0xa1a4d0f6, 0x3a019c99, + 0x448dee14, 0xdf28a27b, 0xa8b6708b, 0x33133ce4, 0x478bd56b, + 0xdc2e9904, 0xabb04bf4, 0x3015079b, 0x428198ea, 0xd924d485, + 0xaeba0675, 0x351f4a1a, 0x4187a395, 0xda22effa, 0xadbc3d0a, + 0x36197165}, + {0x00000000, 0xc18edfc0, 0x586cb9c1, 0x99e26601, 0xb0d97382, + 0x7157ac42, 0xe8b5ca43, 0x293b1583, 0xbac3e145, 0x7b4d3e85, + 0xe2af5884, 0x23218744, 0x0a1a92c7, 0xcb944d07, 0x52762b06, + 0x93f8f4c6, 0xaef6c4cb, 0x6f781b0b, 0xf69a7d0a, 0x3714a2ca, + 0x1e2fb749, 0xdfa16889, 0x46430e88, 0x87cdd148, 0x1435258e, + 0xd5bbfa4e, 0x4c599c4f, 0x8dd7438f, 0xa4ec560c, 0x656289cc, + 0xfc80efcd, 0x3d0e300d, 0x869c8fd7, 0x47125017, 0xdef03616, + 0x1f7ee9d6, 0x3645fc55, 0xf7cb2395, 0x6e294594, 0xafa79a54, + 0x3c5f6e92, 0xfdd1b152, 0x6433d753, 0xa5bd0893, 0x8c861d10, + 0x4d08c2d0, 0xd4eaa4d1, 0x15647b11, 0x286a4b1c, 0xe9e494dc, + 0x7006f2dd, 0xb1882d1d, 0x98b3389e, 0x593de75e, 0xc0df815f, + 0x01515e9f, 0x92a9aa59, 0x53277599, 0xcac51398, 0x0b4bcc58, + 0x2270d9db, 0xe3fe061b, 0x7a1c601a, 0xbb92bfda, 0xd64819ef, + 0x17c6c62f, 0x8e24a02e, 0x4faa7fee, 0x66916a6d, 0xa71fb5ad, + 0x3efdd3ac, 0xff730c6c, 0x6c8bf8aa, 0xad05276a, 0x34e7416b, + 0xf5699eab, 0xdc528b28, 0x1ddc54e8, 0x843e32e9, 0x45b0ed29, + 0x78bedd24, 0xb93002e4, 0x20d264e5, 0xe15cbb25, 0xc867aea6, + 0x09e97166, 0x900b1767, 0x5185c8a7, 0xc27d3c61, 0x03f3e3a1, + 0x9a1185a0, 0x5b9f5a60, 0x72a44fe3, 0xb32a9023, 0x2ac8f622, + 0xeb4629e2, 0x50d49638, 0x915a49f8, 0x08b82ff9, 0xc936f039, + 0xe00de5ba, 0x21833a7a, 0xb8615c7b, 0x79ef83bb, 0xea17777d, + 0x2b99a8bd, 0xb27bcebc, 0x73f5117c, 0x5ace04ff, 0x9b40db3f, + 0x02a2bd3e, 0xc32c62fe, 0xfe2252f3, 0x3fac8d33, 0xa64eeb32, + 0x67c034f2, 0x4efb2171, 0x8f75feb1, 0x169798b0, 0xd7194770, + 0x44e1b3b6, 0x856f6c76, 0x1c8d0a77, 0xdd03d5b7, 0xf438c034, + 0x35b61ff4, 0xac5479f5, 0x6ddaa635, 0x77e1359f, 0xb66fea5f, + 0x2f8d8c5e, 0xee03539e, 0xc738461d, 0x06b699dd, 0x9f54ffdc, + 0x5eda201c, 0xcd22d4da, 0x0cac0b1a, 0x954e6d1b, 0x54c0b2db, + 0x7dfba758, 0xbc757898, 0x25971e99, 0xe419c159, 0xd917f154, + 0x18992e94, 0x817b4895, 0x40f59755, 0x69ce82d6, 0xa8405d16, + 0x31a23b17, 0xf02ce4d7, 0x63d41011, 0xa25acfd1, 0x3bb8a9d0, + 0xfa367610, 0xd30d6393, 0x1283bc53, 0x8b61da52, 0x4aef0592, + 0xf17dba48, 0x30f36588, 0xa9110389, 0x689fdc49, 0x41a4c9ca, + 0x802a160a, 0x19c8700b, 0xd846afcb, 0x4bbe5b0d, 0x8a3084cd, + 0x13d2e2cc, 0xd25c3d0c, 0xfb67288f, 0x3ae9f74f, 0xa30b914e, + 0x62854e8e, 0x5f8b7e83, 0x9e05a143, 0x07e7c742, 0xc6691882, + 0xef520d01, 0x2edcd2c1, 0xb73eb4c0, 0x76b06b00, 0xe5489fc6, + 0x24c64006, 0xbd242607, 0x7caaf9c7, 0x5591ec44, 0x941f3384, + 0x0dfd5585, 0xcc738a45, 0xa1a92c70, 0x6027f3b0, 0xf9c595b1, + 0x384b4a71, 0x11705ff2, 0xd0fe8032, 0x491ce633, 0x889239f3, + 0x1b6acd35, 0xdae412f5, 0x430674f4, 0x8288ab34, 0xabb3beb7, + 0x6a3d6177, 0xf3df0776, 0x3251d8b6, 0x0f5fe8bb, 0xced1377b, + 0x5733517a, 0x96bd8eba, 0xbf869b39, 0x7e0844f9, 0xe7ea22f8, + 0x2664fd38, 0xb59c09fe, 0x7412d63e, 0xedf0b03f, 0x2c7e6fff, + 0x05457a7c, 0xc4cba5bc, 0x5d29c3bd, 0x9ca71c7d, 0x2735a3a7, + 0xe6bb7c67, 0x7f591a66, 0xbed7c5a6, 0x97ecd025, 0x56620fe5, + 0xcf8069e4, 0x0e0eb624, 0x9df642e2, 0x5c789d22, 0xc59afb23, + 0x041424e3, 0x2d2f3160, 0xeca1eea0, 0x754388a1, 0xb4cd5761, + 0x89c3676c, 0x484db8ac, 0xd1afdead, 0x1021016d, 0x391a14ee, + 0xf894cb2e, 0x6176ad2f, 0xa0f872ef, 0x33008629, 0xf28e59e9, + 0x6b6c3fe8, 0xaae2e028, 0x83d9f5ab, 0x42572a6b, 0xdbb54c6a, + 0x1a3b93aa}, + {0x00000000, 0xefc26b3e, 0x04f5d03d, 0xeb37bb03, 0x09eba07a, + 0xe629cb44, 0x0d1e7047, 0xe2dc1b79, 0x13d740f4, 0xfc152bca, + 0x172290c9, 0xf8e0fbf7, 0x1a3ce08e, 0xf5fe8bb0, 0x1ec930b3, + 0xf10b5b8d, 0x27ae81e8, 0xc86cead6, 0x235b51d5, 0xcc993aeb, + 0x2e452192, 0xc1874aac, 0x2ab0f1af, 0xc5729a91, 0x3479c11c, + 0xdbbbaa22, 0x308c1121, 0xdf4e7a1f, 0x3d926166, 0xd2500a58, + 0x3967b15b, 0xd6a5da65, 0x4f5d03d0, 0xa09f68ee, 0x4ba8d3ed, + 0xa46ab8d3, 0x46b6a3aa, 0xa974c894, 0x42437397, 0xad8118a9, + 0x5c8a4324, 0xb348281a, 0x587f9319, 0xb7bdf827, 0x5561e35e, + 0xbaa38860, 0x51943363, 0xbe56585d, 0x68f38238, 0x8731e906, + 0x6c065205, 0x83c4393b, 0x61182242, 0x8eda497c, 0x65edf27f, + 0x8a2f9941, 0x7b24c2cc, 0x94e6a9f2, 0x7fd112f1, 0x901379cf, + 0x72cf62b6, 0x9d0d0988, 0x763ab28b, 0x99f8d9b5, 0x9eba07a0, + 0x71786c9e, 0x9a4fd79d, 0x758dbca3, 0x9751a7da, 0x7893cce4, + 0x93a477e7, 0x7c661cd9, 0x8d6d4754, 0x62af2c6a, 0x89989769, + 0x665afc57, 0x8486e72e, 0x6b448c10, 0x80733713, 0x6fb15c2d, + 0xb9148648, 0x56d6ed76, 0xbde15675, 0x52233d4b, 0xb0ff2632, + 0x5f3d4d0c, 0xb40af60f, 0x5bc89d31, 0xaac3c6bc, 0x4501ad82, + 0xae361681, 0x41f47dbf, 0xa32866c6, 0x4cea0df8, 0xa7ddb6fb, + 0x481fddc5, 0xd1e70470, 0x3e256f4e, 0xd512d44d, 0x3ad0bf73, + 0xd80ca40a, 0x37cecf34, 0xdcf97437, 0x333b1f09, 0xc2304484, + 0x2df22fba, 0xc6c594b9, 0x2907ff87, 0xcbdbe4fe, 0x24198fc0, + 0xcf2e34c3, 0x20ec5ffd, 0xf6498598, 0x198beea6, 0xf2bc55a5, + 0x1d7e3e9b, 0xffa225e2, 0x10604edc, 0xfb57f5df, 0x14959ee1, + 0xe59ec56c, 0x0a5cae52, 0xe16b1551, 0x0ea97e6f, 0xec756516, + 0x03b70e28, 0xe880b52b, 0x0742de15, 0xe6050901, 0x09c7623f, + 0xe2f0d93c, 0x0d32b202, 0xefeea97b, 0x002cc245, 0xeb1b7946, + 0x04d91278, 0xf5d249f5, 0x1a1022cb, 0xf12799c8, 0x1ee5f2f6, + 0xfc39e98f, 0x13fb82b1, 0xf8cc39b2, 0x170e528c, 0xc1ab88e9, + 0x2e69e3d7, 0xc55e58d4, 0x2a9c33ea, 0xc8402893, 0x278243ad, + 0xccb5f8ae, 0x23779390, 0xd27cc81d, 0x3dbea323, 0xd6891820, + 0x394b731e, 0xdb976867, 0x34550359, 0xdf62b85a, 0x30a0d364, + 0xa9580ad1, 0x469a61ef, 0xadaddaec, 0x426fb1d2, 0xa0b3aaab, + 0x4f71c195, 0xa4467a96, 0x4b8411a8, 0xba8f4a25, 0x554d211b, + 0xbe7a9a18, 0x51b8f126, 0xb364ea5f, 0x5ca68161, 0xb7913a62, + 0x5853515c, 0x8ef68b39, 0x6134e007, 0x8a035b04, 0x65c1303a, + 0x871d2b43, 0x68df407d, 0x83e8fb7e, 0x6c2a9040, 0x9d21cbcd, + 0x72e3a0f3, 0x99d41bf0, 0x761670ce, 0x94ca6bb7, 0x7b080089, + 0x903fbb8a, 0x7ffdd0b4, 0x78bf0ea1, 0x977d659f, 0x7c4ade9c, + 0x9388b5a2, 0x7154aedb, 0x9e96c5e5, 0x75a17ee6, 0x9a6315d8, + 0x6b684e55, 0x84aa256b, 0x6f9d9e68, 0x805ff556, 0x6283ee2f, + 0x8d418511, 0x66763e12, 0x89b4552c, 0x5f118f49, 0xb0d3e477, + 0x5be45f74, 0xb426344a, 0x56fa2f33, 0xb938440d, 0x520fff0e, + 0xbdcd9430, 0x4cc6cfbd, 0xa304a483, 0x48331f80, 0xa7f174be, + 0x452d6fc7, 0xaaef04f9, 0x41d8bffa, 0xae1ad4c4, 0x37e20d71, + 0xd820664f, 0x3317dd4c, 0xdcd5b672, 0x3e09ad0b, 0xd1cbc635, + 0x3afc7d36, 0xd53e1608, 0x24354d85, 0xcbf726bb, 0x20c09db8, + 0xcf02f686, 0x2ddeedff, 0xc21c86c1, 0x292b3dc2, 0xc6e956fc, + 0x104c8c99, 0xff8ee7a7, 0x14b95ca4, 0xfb7b379a, 0x19a72ce3, + 0xf66547dd, 0x1d52fcde, 0xf29097e0, 0x039bcc6d, 0xec59a753, + 0x076e1c50, 0xe8ac776e, 0x0a706c17, 0xe5b20729, 0x0e85bc2a, + 0xe147d714}, + {0x00000000, 0x177b1443, 0x2ef62886, 0x398d3cc5, 0x5dec510c, + 0x4a97454f, 0x731a798a, 0x64616dc9, 0xbbd8a218, 0xaca3b65b, + 0x952e8a9e, 0x82559edd, 0xe634f314, 0xf14fe757, 0xc8c2db92, + 0xdfb9cfd1, 0xacc04271, 0xbbbb5632, 0x82366af7, 0x954d7eb4, + 0xf12c137d, 0xe657073e, 0xdfda3bfb, 0xc8a12fb8, 0x1718e069, + 0x0063f42a, 0x39eec8ef, 0x2e95dcac, 0x4af4b165, 0x5d8fa526, + 0x640299e3, 0x73798da0, 0x82f182a3, 0x958a96e0, 0xac07aa25, + 0xbb7cbe66, 0xdf1dd3af, 0xc866c7ec, 0xf1ebfb29, 0xe690ef6a, + 0x392920bb, 0x2e5234f8, 0x17df083d, 0x00a41c7e, 0x64c571b7, + 0x73be65f4, 0x4a335931, 0x5d484d72, 0x2e31c0d2, 0x394ad491, + 0x00c7e854, 0x17bcfc17, 0x73dd91de, 0x64a6859d, 0x5d2bb958, + 0x4a50ad1b, 0x95e962ca, 0x82927689, 0xbb1f4a4c, 0xac645e0f, + 0xc80533c6, 0xdf7e2785, 0xe6f31b40, 0xf1880f03, 0xde920307, + 0xc9e91744, 0xf0642b81, 0xe71f3fc2, 0x837e520b, 0x94054648, + 0xad887a8d, 0xbaf36ece, 0x654aa11f, 0x7231b55c, 0x4bbc8999, + 0x5cc79dda, 0x38a6f013, 0x2fdde450, 0x1650d895, 0x012bccd6, + 0x72524176, 0x65295535, 0x5ca469f0, 0x4bdf7db3, 0x2fbe107a, + 0x38c50439, 0x014838fc, 0x16332cbf, 0xc98ae36e, 0xdef1f72d, + 0xe77ccbe8, 0xf007dfab, 0x9466b262, 0x831da621, 0xba909ae4, + 0xadeb8ea7, 0x5c6381a4, 0x4b1895e7, 0x7295a922, 0x65eebd61, + 0x018fd0a8, 0x16f4c4eb, 0x2f79f82e, 0x3802ec6d, 0xe7bb23bc, + 0xf0c037ff, 0xc94d0b3a, 0xde361f79, 0xba5772b0, 0xad2c66f3, + 0x94a15a36, 0x83da4e75, 0xf0a3c3d5, 0xe7d8d796, 0xde55eb53, + 0xc92eff10, 0xad4f92d9, 0xba34869a, 0x83b9ba5f, 0x94c2ae1c, + 0x4b7b61cd, 0x5c00758e, 0x658d494b, 0x72f65d08, 0x169730c1, + 0x01ec2482, 0x38611847, 0x2f1a0c04, 0x6655004f, 0x712e140c, + 0x48a328c9, 0x5fd83c8a, 0x3bb95143, 0x2cc24500, 0x154f79c5, + 0x02346d86, 0xdd8da257, 0xcaf6b614, 0xf37b8ad1, 0xe4009e92, + 0x8061f35b, 0x971ae718, 0xae97dbdd, 0xb9eccf9e, 0xca95423e, + 0xddee567d, 0xe4636ab8, 0xf3187efb, 0x97791332, 0x80020771, + 0xb98f3bb4, 0xaef42ff7, 0x714de026, 0x6636f465, 0x5fbbc8a0, + 0x48c0dce3, 0x2ca1b12a, 0x3bdaa569, 0x025799ac, 0x152c8def, + 0xe4a482ec, 0xf3df96af, 0xca52aa6a, 0xdd29be29, 0xb948d3e0, + 0xae33c7a3, 0x97befb66, 0x80c5ef25, 0x5f7c20f4, 0x480734b7, + 0x718a0872, 0x66f11c31, 0x029071f8, 0x15eb65bb, 0x2c66597e, + 0x3b1d4d3d, 0x4864c09d, 0x5f1fd4de, 0x6692e81b, 0x71e9fc58, + 0x15889191, 0x02f385d2, 0x3b7eb917, 0x2c05ad54, 0xf3bc6285, + 0xe4c776c6, 0xdd4a4a03, 0xca315e40, 0xae503389, 0xb92b27ca, + 0x80a61b0f, 0x97dd0f4c, 0xb8c70348, 0xafbc170b, 0x96312bce, + 0x814a3f8d, 0xe52b5244, 0xf2504607, 0xcbdd7ac2, 0xdca66e81, + 0x031fa150, 0x1464b513, 0x2de989d6, 0x3a929d95, 0x5ef3f05c, + 0x4988e41f, 0x7005d8da, 0x677ecc99, 0x14074139, 0x037c557a, + 0x3af169bf, 0x2d8a7dfc, 0x49eb1035, 0x5e900476, 0x671d38b3, + 0x70662cf0, 0xafdfe321, 0xb8a4f762, 0x8129cba7, 0x9652dfe4, + 0xf233b22d, 0xe548a66e, 0xdcc59aab, 0xcbbe8ee8, 0x3a3681eb, + 0x2d4d95a8, 0x14c0a96d, 0x03bbbd2e, 0x67dad0e7, 0x70a1c4a4, + 0x492cf861, 0x5e57ec22, 0x81ee23f3, 0x969537b0, 0xaf180b75, + 0xb8631f36, 0xdc0272ff, 0xcb7966bc, 0xf2f45a79, 0xe58f4e3a, + 0x96f6c39a, 0x818dd7d9, 0xb800eb1c, 0xaf7bff5f, 0xcb1a9296, + 0xdc6186d5, 0xe5ecba10, 0xf297ae53, 0x2d2e6182, 0x3a5575c1, + 0x03d84904, 0x14a35d47, 0x70c2308e, 0x67b924cd, 0x5e341808, + 0x494f0c4b}}; + +local const z_word_t FAR crc_braid_big_table[][256] = { + {0x0000000000000000, 0x43147b1700000000, 0x8628f62e00000000, + 0xc53c8d3900000000, 0x0c51ec5d00000000, 0x4f45974a00000000, + 0x8a791a7300000000, 0xc96d616400000000, 0x18a2d8bb00000000, + 0x5bb6a3ac00000000, 0x9e8a2e9500000000, 0xdd9e558200000000, + 0x14f334e600000000, 0x57e74ff100000000, 0x92dbc2c800000000, + 0xd1cfb9df00000000, 0x7142c0ac00000000, 0x3256bbbb00000000, + 0xf76a368200000000, 0xb47e4d9500000000, 0x7d132cf100000000, + 0x3e0757e600000000, 0xfb3bdadf00000000, 0xb82fa1c800000000, + 0x69e0181700000000, 0x2af4630000000000, 0xefc8ee3900000000, + 0xacdc952e00000000, 0x65b1f44a00000000, 0x26a58f5d00000000, + 0xe399026400000000, 0xa08d797300000000, 0xa382f18200000000, + 0xe0968a9500000000, 0x25aa07ac00000000, 0x66be7cbb00000000, + 0xafd31ddf00000000, 0xecc766c800000000, 0x29fbebf100000000, + 0x6aef90e600000000, 0xbb20293900000000, 0xf834522e00000000, + 0x3d08df1700000000, 0x7e1ca40000000000, 0xb771c56400000000, + 0xf465be7300000000, 0x3159334a00000000, 0x724d485d00000000, + 0xd2c0312e00000000, 0x91d44a3900000000, 0x54e8c70000000000, + 0x17fcbc1700000000, 0xde91dd7300000000, 0x9d85a66400000000, + 0x58b92b5d00000000, 0x1bad504a00000000, 0xca62e99500000000, + 0x8976928200000000, 0x4c4a1fbb00000000, 0x0f5e64ac00000000, + 0xc63305c800000000, 0x85277edf00000000, 0x401bf3e600000000, + 0x030f88f100000000, 0x070392de00000000, 0x4417e9c900000000, + 0x812b64f000000000, 0xc23f1fe700000000, 0x0b527e8300000000, + 0x4846059400000000, 0x8d7a88ad00000000, 0xce6ef3ba00000000, + 0x1fa14a6500000000, 0x5cb5317200000000, 0x9989bc4b00000000, + 0xda9dc75c00000000, 0x13f0a63800000000, 0x50e4dd2f00000000, + 0x95d8501600000000, 0xd6cc2b0100000000, 0x7641527200000000, + 0x3555296500000000, 0xf069a45c00000000, 0xb37ddf4b00000000, + 0x7a10be2f00000000, 0x3904c53800000000, 0xfc38480100000000, + 0xbf2c331600000000, 0x6ee38ac900000000, 0x2df7f1de00000000, + 0xe8cb7ce700000000, 0xabdf07f000000000, 0x62b2669400000000, + 0x21a61d8300000000, 0xe49a90ba00000000, 0xa78eebad00000000, + 0xa481635c00000000, 0xe795184b00000000, 0x22a9957200000000, + 0x61bdee6500000000, 0xa8d08f0100000000, 0xebc4f41600000000, + 0x2ef8792f00000000, 0x6dec023800000000, 0xbc23bbe700000000, + 0xff37c0f000000000, 0x3a0b4dc900000000, 0x791f36de00000000, + 0xb07257ba00000000, 0xf3662cad00000000, 0x365aa19400000000, + 0x754eda8300000000, 0xd5c3a3f000000000, 0x96d7d8e700000000, + 0x53eb55de00000000, 0x10ff2ec900000000, 0xd9924fad00000000, + 0x9a8634ba00000000, 0x5fbab98300000000, 0x1caec29400000000, + 0xcd617b4b00000000, 0x8e75005c00000000, 0x4b498d6500000000, + 0x085df67200000000, 0xc130971600000000, 0x8224ec0100000000, + 0x4718613800000000, 0x040c1a2f00000000, 0x4f00556600000000, + 0x0c142e7100000000, 0xc928a34800000000, 0x8a3cd85f00000000, + 0x4351b93b00000000, 0x0045c22c00000000, 0xc5794f1500000000, + 0x866d340200000000, 0x57a28ddd00000000, 0x14b6f6ca00000000, + 0xd18a7bf300000000, 0x929e00e400000000, 0x5bf3618000000000, + 0x18e71a9700000000, 0xdddb97ae00000000, 0x9ecfecb900000000, + 0x3e4295ca00000000, 0x7d56eedd00000000, 0xb86a63e400000000, + 0xfb7e18f300000000, 0x3213799700000000, 0x7107028000000000, + 0xb43b8fb900000000, 0xf72ff4ae00000000, 0x26e04d7100000000, + 0x65f4366600000000, 0xa0c8bb5f00000000, 0xe3dcc04800000000, + 0x2ab1a12c00000000, 0x69a5da3b00000000, 0xac99570200000000, + 0xef8d2c1500000000, 0xec82a4e400000000, 0xaf96dff300000000, + 0x6aaa52ca00000000, 0x29be29dd00000000, 0xe0d348b900000000, + 0xa3c733ae00000000, 0x66fbbe9700000000, 0x25efc58000000000, + 0xf4207c5f00000000, 0xb734074800000000, 0x72088a7100000000, + 0x311cf16600000000, 0xf871900200000000, 0xbb65eb1500000000, + 0x7e59662c00000000, 0x3d4d1d3b00000000, 0x9dc0644800000000, + 0xded41f5f00000000, 0x1be8926600000000, 0x58fce97100000000, + 0x9191881500000000, 0xd285f30200000000, 0x17b97e3b00000000, + 0x54ad052c00000000, 0x8562bcf300000000, 0xc676c7e400000000, + 0x034a4add00000000, 0x405e31ca00000000, 0x893350ae00000000, + 0xca272bb900000000, 0x0f1ba68000000000, 0x4c0fdd9700000000, + 0x4803c7b800000000, 0x0b17bcaf00000000, 0xce2b319600000000, + 0x8d3f4a8100000000, 0x44522be500000000, 0x074650f200000000, + 0xc27addcb00000000, 0x816ea6dc00000000, 0x50a11f0300000000, + 0x13b5641400000000, 0xd689e92d00000000, 0x959d923a00000000, + 0x5cf0f35e00000000, 0x1fe4884900000000, 0xdad8057000000000, + 0x99cc7e6700000000, 0x3941071400000000, 0x7a557c0300000000, + 0xbf69f13a00000000, 0xfc7d8a2d00000000, 0x3510eb4900000000, + 0x7604905e00000000, 0xb3381d6700000000, 0xf02c667000000000, + 0x21e3dfaf00000000, 0x62f7a4b800000000, 0xa7cb298100000000, + 0xe4df529600000000, 0x2db233f200000000, 0x6ea648e500000000, + 0xab9ac5dc00000000, 0xe88ebecb00000000, 0xeb81363a00000000, + 0xa8954d2d00000000, 0x6da9c01400000000, 0x2ebdbb0300000000, + 0xe7d0da6700000000, 0xa4c4a17000000000, 0x61f82c4900000000, + 0x22ec575e00000000, 0xf323ee8100000000, 0xb037959600000000, + 0x750b18af00000000, 0x361f63b800000000, 0xff7202dc00000000, + 0xbc6679cb00000000, 0x795af4f200000000, 0x3a4e8fe500000000, + 0x9ac3f69600000000, 0xd9d78d8100000000, 0x1ceb00b800000000, + 0x5fff7baf00000000, 0x96921acb00000000, 0xd58661dc00000000, + 0x10baece500000000, 0x53ae97f200000000, 0x82612e2d00000000, + 0xc175553a00000000, 0x0449d80300000000, 0x475da31400000000, + 0x8e30c27000000000, 0xcd24b96700000000, 0x0818345e00000000, + 0x4b0c4f4900000000}, + {0x0000000000000000, 0x3e6bc2ef00000000, 0x3dd0f50400000000, + 0x03bb37eb00000000, 0x7aa0eb0900000000, 0x44cb29e600000000, + 0x47701e0d00000000, 0x791bdce200000000, 0xf440d71300000000, + 0xca2b15fc00000000, 0xc990221700000000, 0xf7fbe0f800000000, + 0x8ee03c1a00000000, 0xb08bfef500000000, 0xb330c91e00000000, + 0x8d5b0bf100000000, 0xe881ae2700000000, 0xd6ea6cc800000000, + 0xd5515b2300000000, 0xeb3a99cc00000000, 0x9221452e00000000, + 0xac4a87c100000000, 0xaff1b02a00000000, 0x919a72c500000000, + 0x1cc1793400000000, 0x22aabbdb00000000, 0x21118c3000000000, + 0x1f7a4edf00000000, 0x6661923d00000000, 0x580a50d200000000, + 0x5bb1673900000000, 0x65daa5d600000000, 0xd0035d4f00000000, + 0xee689fa000000000, 0xedd3a84b00000000, 0xd3b86aa400000000, + 0xaaa3b64600000000, 0x94c874a900000000, 0x9773434200000000, + 0xa91881ad00000000, 0x24438a5c00000000, 0x1a2848b300000000, + 0x19937f5800000000, 0x27f8bdb700000000, 0x5ee3615500000000, + 0x6088a3ba00000000, 0x6333945100000000, 0x5d5856be00000000, + 0x3882f36800000000, 0x06e9318700000000, 0x0552066c00000000, + 0x3b39c48300000000, 0x4222186100000000, 0x7c49da8e00000000, + 0x7ff2ed6500000000, 0x41992f8a00000000, 0xccc2247b00000000, + 0xf2a9e69400000000, 0xf112d17f00000000, 0xcf79139000000000, + 0xb662cf7200000000, 0x88090d9d00000000, 0x8bb23a7600000000, + 0xb5d9f89900000000, 0xa007ba9e00000000, 0x9e6c787100000000, + 0x9dd74f9a00000000, 0xa3bc8d7500000000, 0xdaa7519700000000, + 0xe4cc937800000000, 0xe777a49300000000, 0xd91c667c00000000, + 0x54476d8d00000000, 0x6a2caf6200000000, 0x6997988900000000, + 0x57fc5a6600000000, 0x2ee7868400000000, 0x108c446b00000000, + 0x1337738000000000, 0x2d5cb16f00000000, 0x488614b900000000, + 0x76edd65600000000, 0x7556e1bd00000000, 0x4b3d235200000000, + 0x3226ffb000000000, 0x0c4d3d5f00000000, 0x0ff60ab400000000, + 0x319dc85b00000000, 0xbcc6c3aa00000000, 0x82ad014500000000, + 0x811636ae00000000, 0xbf7df44100000000, 0xc66628a300000000, + 0xf80dea4c00000000, 0xfbb6dda700000000, 0xc5dd1f4800000000, + 0x7004e7d100000000, 0x4e6f253e00000000, 0x4dd412d500000000, + 0x73bfd03a00000000, 0x0aa40cd800000000, 0x34cfce3700000000, + 0x3774f9dc00000000, 0x091f3b3300000000, 0x844430c200000000, + 0xba2ff22d00000000, 0xb994c5c600000000, 0x87ff072900000000, + 0xfee4dbcb00000000, 0xc08f192400000000, 0xc3342ecf00000000, + 0xfd5fec2000000000, 0x988549f600000000, 0xa6ee8b1900000000, + 0xa555bcf200000000, 0x9b3e7e1d00000000, 0xe225a2ff00000000, + 0xdc4e601000000000, 0xdff557fb00000000, 0xe19e951400000000, + 0x6cc59ee500000000, 0x52ae5c0a00000000, 0x51156be100000000, + 0x6f7ea90e00000000, 0x166575ec00000000, 0x280eb70300000000, + 0x2bb580e800000000, 0x15de420700000000, 0x010905e600000000, + 0x3f62c70900000000, 0x3cd9f0e200000000, 0x02b2320d00000000, + 0x7ba9eeef00000000, 0x45c22c0000000000, 0x46791beb00000000, + 0x7812d90400000000, 0xf549d2f500000000, 0xcb22101a00000000, + 0xc89927f100000000, 0xf6f2e51e00000000, 0x8fe939fc00000000, + 0xb182fb1300000000, 0xb239ccf800000000, 0x8c520e1700000000, + 0xe988abc100000000, 0xd7e3692e00000000, 0xd4585ec500000000, + 0xea339c2a00000000, 0x932840c800000000, 0xad43822700000000, + 0xaef8b5cc00000000, 0x9093772300000000, 0x1dc87cd200000000, + 0x23a3be3d00000000, 0x201889d600000000, 0x1e734b3900000000, + 0x676897db00000000, 0x5903553400000000, 0x5ab862df00000000, + 0x64d3a03000000000, 0xd10a58a900000000, 0xef619a4600000000, + 0xecdaadad00000000, 0xd2b16f4200000000, 0xabaab3a000000000, + 0x95c1714f00000000, 0x967a46a400000000, 0xa811844b00000000, + 0x254a8fba00000000, 0x1b214d5500000000, 0x189a7abe00000000, + 0x26f1b85100000000, 0x5fea64b300000000, 0x6181a65c00000000, + 0x623a91b700000000, 0x5c51535800000000, 0x398bf68e00000000, + 0x07e0346100000000, 0x045b038a00000000, 0x3a30c16500000000, + 0x432b1d8700000000, 0x7d40df6800000000, 0x7efbe88300000000, + 0x40902a6c00000000, 0xcdcb219d00000000, 0xf3a0e37200000000, + 0xf01bd49900000000, 0xce70167600000000, 0xb76bca9400000000, + 0x8900087b00000000, 0x8abb3f9000000000, 0xb4d0fd7f00000000, + 0xa10ebf7800000000, 0x9f657d9700000000, 0x9cde4a7c00000000, + 0xa2b5889300000000, 0xdbae547100000000, 0xe5c5969e00000000, + 0xe67ea17500000000, 0xd815639a00000000, 0x554e686b00000000, + 0x6b25aa8400000000, 0x689e9d6f00000000, 0x56f55f8000000000, + 0x2fee836200000000, 0x1185418d00000000, 0x123e766600000000, + 0x2c55b48900000000, 0x498f115f00000000, 0x77e4d3b000000000, + 0x745fe45b00000000, 0x4a3426b400000000, 0x332ffa5600000000, + 0x0d4438b900000000, 0x0eff0f5200000000, 0x3094cdbd00000000, + 0xbdcfc64c00000000, 0x83a404a300000000, 0x801f334800000000, + 0xbe74f1a700000000, 0xc76f2d4500000000, 0xf904efaa00000000, + 0xfabfd84100000000, 0xc4d41aae00000000, 0x710de23700000000, + 0x4f6620d800000000, 0x4cdd173300000000, 0x72b6d5dc00000000, + 0x0bad093e00000000, 0x35c6cbd100000000, 0x367dfc3a00000000, + 0x08163ed500000000, 0x854d352400000000, 0xbb26f7cb00000000, + 0xb89dc02000000000, 0x86f602cf00000000, 0xffedde2d00000000, + 0xc1861cc200000000, 0xc23d2b2900000000, 0xfc56e9c600000000, + 0x998c4c1000000000, 0xa7e78eff00000000, 0xa45cb91400000000, + 0x9a377bfb00000000, 0xe32ca71900000000, 0xdd4765f600000000, + 0xdefc521d00000000, 0xe09790f200000000, 0x6dcc9b0300000000, + 0x53a759ec00000000, 0x501c6e0700000000, 0x6e77ace800000000, + 0x176c700a00000000, 0x2907b2e500000000, 0x2abc850e00000000, + 0x14d747e100000000}, + {0x0000000000000000, 0xc0df8ec100000000, 0xc1b96c5800000000, + 0x0166e29900000000, 0x8273d9b000000000, 0x42ac577100000000, + 0x43cab5e800000000, 0x83153b2900000000, 0x45e1c3ba00000000, + 0x853e4d7b00000000, 0x8458afe200000000, 0x4487212300000000, + 0xc7921a0a00000000, 0x074d94cb00000000, 0x062b765200000000, + 0xc6f4f89300000000, 0xcbc4f6ae00000000, 0x0b1b786f00000000, + 0x0a7d9af600000000, 0xcaa2143700000000, 0x49b72f1e00000000, + 0x8968a1df00000000, 0x880e434600000000, 0x48d1cd8700000000, + 0x8e25351400000000, 0x4efabbd500000000, 0x4f9c594c00000000, + 0x8f43d78d00000000, 0x0c56eca400000000, 0xcc89626500000000, + 0xcdef80fc00000000, 0x0d300e3d00000000, 0xd78f9c8600000000, + 0x1750124700000000, 0x1636f0de00000000, 0xd6e97e1f00000000, + 0x55fc453600000000, 0x9523cbf700000000, 0x9445296e00000000, + 0x549aa7af00000000, 0x926e5f3c00000000, 0x52b1d1fd00000000, + 0x53d7336400000000, 0x9308bda500000000, 0x101d868c00000000, + 0xd0c2084d00000000, 0xd1a4ead400000000, 0x117b641500000000, + 0x1c4b6a2800000000, 0xdc94e4e900000000, 0xddf2067000000000, + 0x1d2d88b100000000, 0x9e38b39800000000, 0x5ee73d5900000000, + 0x5f81dfc000000000, 0x9f5e510100000000, 0x59aaa99200000000, + 0x9975275300000000, 0x9813c5ca00000000, 0x58cc4b0b00000000, + 0xdbd9702200000000, 0x1b06fee300000000, 0x1a601c7a00000000, + 0xdabf92bb00000000, 0xef1948d600000000, 0x2fc6c61700000000, + 0x2ea0248e00000000, 0xee7faa4f00000000, 0x6d6a916600000000, + 0xadb51fa700000000, 0xacd3fd3e00000000, 0x6c0c73ff00000000, + 0xaaf88b6c00000000, 0x6a2705ad00000000, 0x6b41e73400000000, + 0xab9e69f500000000, 0x288b52dc00000000, 0xe854dc1d00000000, + 0xe9323e8400000000, 0x29edb04500000000, 0x24ddbe7800000000, + 0xe40230b900000000, 0xe564d22000000000, 0x25bb5ce100000000, + 0xa6ae67c800000000, 0x6671e90900000000, 0x67170b9000000000, + 0xa7c8855100000000, 0x613c7dc200000000, 0xa1e3f30300000000, + 0xa085119a00000000, 0x605a9f5b00000000, 0xe34fa47200000000, + 0x23902ab300000000, 0x22f6c82a00000000, 0xe22946eb00000000, + 0x3896d45000000000, 0xf8495a9100000000, 0xf92fb80800000000, + 0x39f036c900000000, 0xbae50de000000000, 0x7a3a832100000000, + 0x7b5c61b800000000, 0xbb83ef7900000000, 0x7d7717ea00000000, + 0xbda8992b00000000, 0xbcce7bb200000000, 0x7c11f57300000000, + 0xff04ce5a00000000, 0x3fdb409b00000000, 0x3ebda20200000000, + 0xfe622cc300000000, 0xf35222fe00000000, 0x338dac3f00000000, + 0x32eb4ea600000000, 0xf234c06700000000, 0x7121fb4e00000000, + 0xb1fe758f00000000, 0xb098971600000000, 0x704719d700000000, + 0xb6b3e14400000000, 0x766c6f8500000000, 0x770a8d1c00000000, + 0xb7d503dd00000000, 0x34c038f400000000, 0xf41fb63500000000, + 0xf57954ac00000000, 0x35a6da6d00000000, 0x9f35e17700000000, + 0x5fea6fb600000000, 0x5e8c8d2f00000000, 0x9e5303ee00000000, + 0x1d4638c700000000, 0xdd99b60600000000, 0xdcff549f00000000, + 0x1c20da5e00000000, 0xdad422cd00000000, 0x1a0bac0c00000000, + 0x1b6d4e9500000000, 0xdbb2c05400000000, 0x58a7fb7d00000000, + 0x987875bc00000000, 0x991e972500000000, 0x59c119e400000000, + 0x54f117d900000000, 0x942e991800000000, 0x95487b8100000000, + 0x5597f54000000000, 0xd682ce6900000000, 0x165d40a800000000, + 0x173ba23100000000, 0xd7e42cf000000000, 0x1110d46300000000, + 0xd1cf5aa200000000, 0xd0a9b83b00000000, 0x107636fa00000000, + 0x93630dd300000000, 0x53bc831200000000, 0x52da618b00000000, + 0x9205ef4a00000000, 0x48ba7df100000000, 0x8865f33000000000, + 0x890311a900000000, 0x49dc9f6800000000, 0xcac9a44100000000, + 0x0a162a8000000000, 0x0b70c81900000000, 0xcbaf46d800000000, + 0x0d5bbe4b00000000, 0xcd84308a00000000, 0xcce2d21300000000, + 0x0c3d5cd200000000, 0x8f2867fb00000000, 0x4ff7e93a00000000, + 0x4e910ba300000000, 0x8e4e856200000000, 0x837e8b5f00000000, + 0x43a1059e00000000, 0x42c7e70700000000, 0x821869c600000000, + 0x010d52ef00000000, 0xc1d2dc2e00000000, 0xc0b43eb700000000, + 0x006bb07600000000, 0xc69f48e500000000, 0x0640c62400000000, + 0x072624bd00000000, 0xc7f9aa7c00000000, 0x44ec915500000000, + 0x84331f9400000000, 0x8555fd0d00000000, 0x458a73cc00000000, + 0x702ca9a100000000, 0xb0f3276000000000, 0xb195c5f900000000, + 0x714a4b3800000000, 0xf25f701100000000, 0x3280fed000000000, + 0x33e61c4900000000, 0xf339928800000000, 0x35cd6a1b00000000, + 0xf512e4da00000000, 0xf474064300000000, 0x34ab888200000000, + 0xb7beb3ab00000000, 0x77613d6a00000000, 0x7607dff300000000, + 0xb6d8513200000000, 0xbbe85f0f00000000, 0x7b37d1ce00000000, + 0x7a51335700000000, 0xba8ebd9600000000, 0x399b86bf00000000, + 0xf944087e00000000, 0xf822eae700000000, 0x38fd642600000000, + 0xfe099cb500000000, 0x3ed6127400000000, 0x3fb0f0ed00000000, + 0xff6f7e2c00000000, 0x7c7a450500000000, 0xbca5cbc400000000, + 0xbdc3295d00000000, 0x7d1ca79c00000000, 0xa7a3352700000000, + 0x677cbbe600000000, 0x661a597f00000000, 0xa6c5d7be00000000, + 0x25d0ec9700000000, 0xe50f625600000000, 0xe46980cf00000000, + 0x24b60e0e00000000, 0xe242f69d00000000, 0x229d785c00000000, + 0x23fb9ac500000000, 0xe324140400000000, 0x60312f2d00000000, + 0xa0eea1ec00000000, 0xa188437500000000, 0x6157cdb400000000, + 0x6c67c38900000000, 0xacb84d4800000000, 0xaddeafd100000000, + 0x6d01211000000000, 0xee141a3900000000, 0x2ecb94f800000000, + 0x2fad766100000000, 0xef72f8a000000000, 0x2986003300000000, + 0xe9598ef200000000, 0xe83f6c6b00000000, 0x28e0e2aa00000000, + 0xabf5d98300000000, 0x6b2a574200000000, 0x6a4cb5db00000000, + 0xaa933b1a00000000}, + {0x0000000000000000, 0x6f4ca59b00000000, 0x9f9e3bec00000000, + 0xf0d29e7700000000, 0x7f3b060300000000, 0x1077a39800000000, + 0xe0a53def00000000, 0x8fe9987400000000, 0xfe760c0600000000, + 0x913aa99d00000000, 0x61e837ea00000000, 0x0ea4927100000000, + 0x814d0a0500000000, 0xee01af9e00000000, 0x1ed331e900000000, + 0x719f947200000000, 0xfced180c00000000, 0x93a1bd9700000000, + 0x637323e000000000, 0x0c3f867b00000000, 0x83d61e0f00000000, + 0xec9abb9400000000, 0x1c4825e300000000, 0x7304807800000000, + 0x029b140a00000000, 0x6dd7b19100000000, 0x9d052fe600000000, + 0xf2498a7d00000000, 0x7da0120900000000, 0x12ecb79200000000, + 0xe23e29e500000000, 0x8d728c7e00000000, 0xf8db311800000000, + 0x9797948300000000, 0x67450af400000000, 0x0809af6f00000000, + 0x87e0371b00000000, 0xe8ac928000000000, 0x187e0cf700000000, + 0x7732a96c00000000, 0x06ad3d1e00000000, 0x69e1988500000000, + 0x993306f200000000, 0xf67fa36900000000, 0x79963b1d00000000, + 0x16da9e8600000000, 0xe60800f100000000, 0x8944a56a00000000, + 0x0436291400000000, 0x6b7a8c8f00000000, 0x9ba812f800000000, + 0xf4e4b76300000000, 0x7b0d2f1700000000, 0x14418a8c00000000, + 0xe49314fb00000000, 0x8bdfb16000000000, 0xfa40251200000000, + 0x950c808900000000, 0x65de1efe00000000, 0x0a92bb6500000000, + 0x857b231100000000, 0xea37868a00000000, 0x1ae518fd00000000, + 0x75a9bd6600000000, 0xf0b7633000000000, 0x9ffbc6ab00000000, + 0x6f2958dc00000000, 0x0065fd4700000000, 0x8f8c653300000000, + 0xe0c0c0a800000000, 0x10125edf00000000, 0x7f5efb4400000000, + 0x0ec16f3600000000, 0x618dcaad00000000, 0x915f54da00000000, + 0xfe13f14100000000, 0x71fa693500000000, 0x1eb6ccae00000000, + 0xee6452d900000000, 0x8128f74200000000, 0x0c5a7b3c00000000, + 0x6316dea700000000, 0x93c440d000000000, 0xfc88e54b00000000, + 0x73617d3f00000000, 0x1c2dd8a400000000, 0xecff46d300000000, + 0x83b3e34800000000, 0xf22c773a00000000, 0x9d60d2a100000000, + 0x6db24cd600000000, 0x02fee94d00000000, 0x8d17713900000000, + 0xe25bd4a200000000, 0x12894ad500000000, 0x7dc5ef4e00000000, + 0x086c522800000000, 0x6720f7b300000000, 0x97f269c400000000, + 0xf8becc5f00000000, 0x7757542b00000000, 0x181bf1b000000000, + 0xe8c96fc700000000, 0x8785ca5c00000000, 0xf61a5e2e00000000, + 0x9956fbb500000000, 0x698465c200000000, 0x06c8c05900000000, + 0x8921582d00000000, 0xe66dfdb600000000, 0x16bf63c100000000, + 0x79f3c65a00000000, 0xf4814a2400000000, 0x9bcdefbf00000000, + 0x6b1f71c800000000, 0x0453d45300000000, 0x8bba4c2700000000, + 0xe4f6e9bc00000000, 0x142477cb00000000, 0x7b68d25000000000, + 0x0af7462200000000, 0x65bbe3b900000000, 0x95697dce00000000, + 0xfa25d85500000000, 0x75cc402100000000, 0x1a80e5ba00000000, + 0xea527bcd00000000, 0x851ede5600000000, 0xe06fc76000000000, + 0x8f2362fb00000000, 0x7ff1fc8c00000000, 0x10bd591700000000, + 0x9f54c16300000000, 0xf01864f800000000, 0x00cafa8f00000000, + 0x6f865f1400000000, 0x1e19cb6600000000, 0x71556efd00000000, + 0x8187f08a00000000, 0xeecb551100000000, 0x6122cd6500000000, + 0x0e6e68fe00000000, 0xfebcf68900000000, 0x91f0531200000000, + 0x1c82df6c00000000, 0x73ce7af700000000, 0x831ce48000000000, + 0xec50411b00000000, 0x63b9d96f00000000, 0x0cf57cf400000000, + 0xfc27e28300000000, 0x936b471800000000, 0xe2f4d36a00000000, + 0x8db876f100000000, 0x7d6ae88600000000, 0x12264d1d00000000, + 0x9dcfd56900000000, 0xf28370f200000000, 0x0251ee8500000000, + 0x6d1d4b1e00000000, 0x18b4f67800000000, 0x77f853e300000000, + 0x872acd9400000000, 0xe866680f00000000, 0x678ff07b00000000, + 0x08c355e000000000, 0xf811cb9700000000, 0x975d6e0c00000000, + 0xe6c2fa7e00000000, 0x898e5fe500000000, 0x795cc19200000000, + 0x1610640900000000, 0x99f9fc7d00000000, 0xf6b559e600000000, + 0x0667c79100000000, 0x692b620a00000000, 0xe459ee7400000000, + 0x8b154bef00000000, 0x7bc7d59800000000, 0x148b700300000000, + 0x9b62e87700000000, 0xf42e4dec00000000, 0x04fcd39b00000000, + 0x6bb0760000000000, 0x1a2fe27200000000, 0x756347e900000000, + 0x85b1d99e00000000, 0xeafd7c0500000000, 0x6514e47100000000, + 0x0a5841ea00000000, 0xfa8adf9d00000000, 0x95c67a0600000000, + 0x10d8a45000000000, 0x7f9401cb00000000, 0x8f469fbc00000000, + 0xe00a3a2700000000, 0x6fe3a25300000000, 0x00af07c800000000, + 0xf07d99bf00000000, 0x9f313c2400000000, 0xeeaea85600000000, + 0x81e20dcd00000000, 0x713093ba00000000, 0x1e7c362100000000, + 0x9195ae5500000000, 0xfed90bce00000000, 0x0e0b95b900000000, + 0x6147302200000000, 0xec35bc5c00000000, 0x837919c700000000, + 0x73ab87b000000000, 0x1ce7222b00000000, 0x930eba5f00000000, + 0xfc421fc400000000, 0x0c9081b300000000, 0x63dc242800000000, + 0x1243b05a00000000, 0x7d0f15c100000000, 0x8ddd8bb600000000, + 0xe2912e2d00000000, 0x6d78b65900000000, 0x023413c200000000, + 0xf2e68db500000000, 0x9daa282e00000000, 0xe803954800000000, + 0x874f30d300000000, 0x779daea400000000, 0x18d10b3f00000000, + 0x9738934b00000000, 0xf87436d000000000, 0x08a6a8a700000000, + 0x67ea0d3c00000000, 0x1675994e00000000, 0x79393cd500000000, + 0x89eba2a200000000, 0xe6a7073900000000, 0x694e9f4d00000000, + 0x06023ad600000000, 0xf6d0a4a100000000, 0x999c013a00000000, + 0x14ee8d4400000000, 0x7ba228df00000000, 0x8b70b6a800000000, + 0xe43c133300000000, 0x6bd58b4700000000, 0x04992edc00000000, + 0xf44bb0ab00000000, 0x9b07153000000000, 0xea98814200000000, + 0x85d424d900000000, 0x7506baae00000000, 0x1a4a1f3500000000, + 0x95a3874100000000, 0xfaef22da00000000, 0x0a3dbcad00000000, + 0x6571193600000000}, + {0x0000000000000000, 0x85d996dd00000000, 0x4bb55c6000000000, + 0xce6ccabd00000000, 0x966ab9c000000000, 0x13b32f1d00000000, + 0xdddfe5a000000000, 0x5806737d00000000, 0x6dd3035a00000000, + 0xe80a958700000000, 0x26665f3a00000000, 0xa3bfc9e700000000, + 0xfbb9ba9a00000000, 0x7e602c4700000000, 0xb00ce6fa00000000, + 0x35d5702700000000, 0xdaa607b400000000, 0x5f7f916900000000, + 0x91135bd400000000, 0x14cacd0900000000, 0x4cccbe7400000000, + 0xc91528a900000000, 0x0779e21400000000, 0x82a074c900000000, + 0xb77504ee00000000, 0x32ac923300000000, 0xfcc0588e00000000, + 0x7919ce5300000000, 0x211fbd2e00000000, 0xa4c62bf300000000, + 0x6aaae14e00000000, 0xef73779300000000, 0xf54b7eb300000000, + 0x7092e86e00000000, 0xbefe22d300000000, 0x3b27b40e00000000, + 0x6321c77300000000, 0xe6f851ae00000000, 0x28949b1300000000, + 0xad4d0dce00000000, 0x98987de900000000, 0x1d41eb3400000000, + 0xd32d218900000000, 0x56f4b75400000000, 0x0ef2c42900000000, + 0x8b2b52f400000000, 0x4547984900000000, 0xc09e0e9400000000, + 0x2fed790700000000, 0xaa34efda00000000, 0x6458256700000000, + 0xe181b3ba00000000, 0xb987c0c700000000, 0x3c5e561a00000000, + 0xf2329ca700000000, 0x77eb0a7a00000000, 0x423e7a5d00000000, + 0xc7e7ec8000000000, 0x098b263d00000000, 0x8c52b0e000000000, + 0xd454c39d00000000, 0x518d554000000000, 0x9fe19ffd00000000, + 0x1a38092000000000, 0xab918dbd00000000, 0x2e481b6000000000, + 0xe024d1dd00000000, 0x65fd470000000000, 0x3dfb347d00000000, + 0xb822a2a000000000, 0x764e681d00000000, 0xf397fec000000000, + 0xc6428ee700000000, 0x439b183a00000000, 0x8df7d28700000000, + 0x082e445a00000000, 0x5028372700000000, 0xd5f1a1fa00000000, + 0x1b9d6b4700000000, 0x9e44fd9a00000000, 0x71378a0900000000, + 0xf4ee1cd400000000, 0x3a82d66900000000, 0xbf5b40b400000000, + 0xe75d33c900000000, 0x6284a51400000000, 0xace86fa900000000, + 0x2931f97400000000, 0x1ce4895300000000, 0x993d1f8e00000000, + 0x5751d53300000000, 0xd28843ee00000000, 0x8a8e309300000000, + 0x0f57a64e00000000, 0xc13b6cf300000000, 0x44e2fa2e00000000, + 0x5edaf30e00000000, 0xdb0365d300000000, 0x156faf6e00000000, + 0x90b639b300000000, 0xc8b04ace00000000, 0x4d69dc1300000000, + 0x830516ae00000000, 0x06dc807300000000, 0x3309f05400000000, + 0xb6d0668900000000, 0x78bcac3400000000, 0xfd653ae900000000, + 0xa563499400000000, 0x20badf4900000000, 0xeed615f400000000, + 0x6b0f832900000000, 0x847cf4ba00000000, 0x01a5626700000000, + 0xcfc9a8da00000000, 0x4a103e0700000000, 0x12164d7a00000000, + 0x97cfdba700000000, 0x59a3111a00000000, 0xdc7a87c700000000, + 0xe9aff7e000000000, 0x6c76613d00000000, 0xa21aab8000000000, + 0x27c33d5d00000000, 0x7fc54e2000000000, 0xfa1cd8fd00000000, + 0x3470124000000000, 0xb1a9849d00000000, 0x17256aa000000000, + 0x92fcfc7d00000000, 0x5c9036c000000000, 0xd949a01d00000000, + 0x814fd36000000000, 0x049645bd00000000, 0xcafa8f0000000000, + 0x4f2319dd00000000, 0x7af669fa00000000, 0xff2fff2700000000, + 0x3143359a00000000, 0xb49aa34700000000, 0xec9cd03a00000000, + 0x694546e700000000, 0xa7298c5a00000000, 0x22f01a8700000000, + 0xcd836d1400000000, 0x485afbc900000000, 0x8636317400000000, + 0x03efa7a900000000, 0x5be9d4d400000000, 0xde30420900000000, + 0x105c88b400000000, 0x95851e6900000000, 0xa0506e4e00000000, + 0x2589f89300000000, 0xebe5322e00000000, 0x6e3ca4f300000000, + 0x363ad78e00000000, 0xb3e3415300000000, 0x7d8f8bee00000000, + 0xf8561d3300000000, 0xe26e141300000000, 0x67b782ce00000000, + 0xa9db487300000000, 0x2c02deae00000000, 0x7404add300000000, + 0xf1dd3b0e00000000, 0x3fb1f1b300000000, 0xba68676e00000000, + 0x8fbd174900000000, 0x0a64819400000000, 0xc4084b2900000000, + 0x41d1ddf400000000, 0x19d7ae8900000000, 0x9c0e385400000000, + 0x5262f2e900000000, 0xd7bb643400000000, 0x38c813a700000000, + 0xbd11857a00000000, 0x737d4fc700000000, 0xf6a4d91a00000000, + 0xaea2aa6700000000, 0x2b7b3cba00000000, 0xe517f60700000000, + 0x60ce60da00000000, 0x551b10fd00000000, 0xd0c2862000000000, + 0x1eae4c9d00000000, 0x9b77da4000000000, 0xc371a93d00000000, + 0x46a83fe000000000, 0x88c4f55d00000000, 0x0d1d638000000000, + 0xbcb4e71d00000000, 0x396d71c000000000, 0xf701bb7d00000000, + 0x72d82da000000000, 0x2ade5edd00000000, 0xaf07c80000000000, + 0x616b02bd00000000, 0xe4b2946000000000, 0xd167e44700000000, + 0x54be729a00000000, 0x9ad2b82700000000, 0x1f0b2efa00000000, + 0x470d5d8700000000, 0xc2d4cb5a00000000, 0x0cb801e700000000, + 0x8961973a00000000, 0x6612e0a900000000, 0xe3cb767400000000, + 0x2da7bcc900000000, 0xa87e2a1400000000, 0xf078596900000000, + 0x75a1cfb400000000, 0xbbcd050900000000, 0x3e1493d400000000, + 0x0bc1e3f300000000, 0x8e18752e00000000, 0x4074bf9300000000, + 0xc5ad294e00000000, 0x9dab5a3300000000, 0x1872ccee00000000, + 0xd61e065300000000, 0x53c7908e00000000, 0x49ff99ae00000000, + 0xcc260f7300000000, 0x024ac5ce00000000, 0x8793531300000000, + 0xdf95206e00000000, 0x5a4cb6b300000000, 0x94207c0e00000000, + 0x11f9ead300000000, 0x242c9af400000000, 0xa1f50c2900000000, + 0x6f99c69400000000, 0xea40504900000000, 0xb246233400000000, + 0x379fb5e900000000, 0xf9f37f5400000000, 0x7c2ae98900000000, + 0x93599e1a00000000, 0x168008c700000000, 0xd8ecc27a00000000, + 0x5d3554a700000000, 0x053327da00000000, 0x80eab10700000000, + 0x4e867bba00000000, 0xcb5fed6700000000, 0xfe8a9d4000000000, + 0x7b530b9d00000000, 0xb53fc12000000000, 0x30e657fd00000000, + 0x68e0248000000000, 0xed39b25d00000000, 0x235578e000000000, + 0xa68cee3d00000000}, + {0x0000000000000000, 0x76e10f9d00000000, 0xadc46ee100000000, + 0xdb25617c00000000, 0x1b8fac1900000000, 0x6d6ea38400000000, + 0xb64bc2f800000000, 0xc0aacd6500000000, 0x361e593300000000, + 0x40ff56ae00000000, 0x9bda37d200000000, 0xed3b384f00000000, + 0x2d91f52a00000000, 0x5b70fab700000000, 0x80559bcb00000000, + 0xf6b4945600000000, 0x6c3cb26600000000, 0x1addbdfb00000000, + 0xc1f8dc8700000000, 0xb719d31a00000000, 0x77b31e7f00000000, + 0x015211e200000000, 0xda77709e00000000, 0xac967f0300000000, + 0x5a22eb5500000000, 0x2cc3e4c800000000, 0xf7e685b400000000, + 0x81078a2900000000, 0x41ad474c00000000, 0x374c48d100000000, + 0xec6929ad00000000, 0x9a88263000000000, 0xd87864cd00000000, + 0xae996b5000000000, 0x75bc0a2c00000000, 0x035d05b100000000, + 0xc3f7c8d400000000, 0xb516c74900000000, 0x6e33a63500000000, + 0x18d2a9a800000000, 0xee663dfe00000000, 0x9887326300000000, + 0x43a2531f00000000, 0x35435c8200000000, 0xf5e991e700000000, + 0x83089e7a00000000, 0x582dff0600000000, 0x2eccf09b00000000, + 0xb444d6ab00000000, 0xc2a5d93600000000, 0x1980b84a00000000, + 0x6f61b7d700000000, 0xafcb7ab200000000, 0xd92a752f00000000, + 0x020f145300000000, 0x74ee1bce00000000, 0x825a8f9800000000, + 0xf4bb800500000000, 0x2f9ee17900000000, 0x597feee400000000, + 0x99d5238100000000, 0xef342c1c00000000, 0x34114d6000000000, + 0x42f042fd00000000, 0xf1f7b94100000000, 0x8716b6dc00000000, + 0x5c33d7a000000000, 0x2ad2d83d00000000, 0xea78155800000000, + 0x9c991ac500000000, 0x47bc7bb900000000, 0x315d742400000000, + 0xc7e9e07200000000, 0xb108efef00000000, 0x6a2d8e9300000000, + 0x1ccc810e00000000, 0xdc664c6b00000000, 0xaa8743f600000000, + 0x71a2228a00000000, 0x07432d1700000000, 0x9dcb0b2700000000, + 0xeb2a04ba00000000, 0x300f65c600000000, 0x46ee6a5b00000000, + 0x8644a73e00000000, 0xf0a5a8a300000000, 0x2b80c9df00000000, + 0x5d61c64200000000, 0xabd5521400000000, 0xdd345d8900000000, + 0x06113cf500000000, 0x70f0336800000000, 0xb05afe0d00000000, + 0xc6bbf19000000000, 0x1d9e90ec00000000, 0x6b7f9f7100000000, + 0x298fdd8c00000000, 0x5f6ed21100000000, 0x844bb36d00000000, + 0xf2aabcf000000000, 0x3200719500000000, 0x44e17e0800000000, + 0x9fc41f7400000000, 0xe92510e900000000, 0x1f9184bf00000000, + 0x69708b2200000000, 0xb255ea5e00000000, 0xc4b4e5c300000000, + 0x041e28a600000000, 0x72ff273b00000000, 0xa9da464700000000, + 0xdf3b49da00000000, 0x45b36fea00000000, 0x3352607700000000, + 0xe877010b00000000, 0x9e960e9600000000, 0x5e3cc3f300000000, + 0x28ddcc6e00000000, 0xf3f8ad1200000000, 0x8519a28f00000000, + 0x73ad36d900000000, 0x054c394400000000, 0xde69583800000000, + 0xa88857a500000000, 0x68229ac000000000, 0x1ec3955d00000000, + 0xc5e6f42100000000, 0xb307fbbc00000000, 0xe2ef738300000000, + 0x940e7c1e00000000, 0x4f2b1d6200000000, 0x39ca12ff00000000, + 0xf960df9a00000000, 0x8f81d00700000000, 0x54a4b17b00000000, + 0x2245bee600000000, 0xd4f12ab000000000, 0xa210252d00000000, + 0x7935445100000000, 0x0fd44bcc00000000, 0xcf7e86a900000000, + 0xb99f893400000000, 0x62bae84800000000, 0x145be7d500000000, + 0x8ed3c1e500000000, 0xf832ce7800000000, 0x2317af0400000000, + 0x55f6a09900000000, 0x955c6dfc00000000, 0xe3bd626100000000, + 0x3898031d00000000, 0x4e790c8000000000, 0xb8cd98d600000000, + 0xce2c974b00000000, 0x1509f63700000000, 0x63e8f9aa00000000, + 0xa34234cf00000000, 0xd5a33b5200000000, 0x0e865a2e00000000, + 0x786755b300000000, 0x3a97174e00000000, 0x4c7618d300000000, + 0x975379af00000000, 0xe1b2763200000000, 0x2118bb5700000000, + 0x57f9b4ca00000000, 0x8cdcd5b600000000, 0xfa3dda2b00000000, + 0x0c894e7d00000000, 0x7a6841e000000000, 0xa14d209c00000000, + 0xd7ac2f0100000000, 0x1706e26400000000, 0x61e7edf900000000, + 0xbac28c8500000000, 0xcc23831800000000, 0x56aba52800000000, + 0x204aaab500000000, 0xfb6fcbc900000000, 0x8d8ec45400000000, + 0x4d24093100000000, 0x3bc506ac00000000, 0xe0e067d000000000, + 0x9601684d00000000, 0x60b5fc1b00000000, 0x1654f38600000000, + 0xcd7192fa00000000, 0xbb909d6700000000, 0x7b3a500200000000, + 0x0ddb5f9f00000000, 0xd6fe3ee300000000, 0xa01f317e00000000, + 0x1318cac200000000, 0x65f9c55f00000000, 0xbedca42300000000, + 0xc83dabbe00000000, 0x089766db00000000, 0x7e76694600000000, + 0xa553083a00000000, 0xd3b207a700000000, 0x250693f100000000, + 0x53e79c6c00000000, 0x88c2fd1000000000, 0xfe23f28d00000000, + 0x3e893fe800000000, 0x4868307500000000, 0x934d510900000000, + 0xe5ac5e9400000000, 0x7f2478a400000000, 0x09c5773900000000, + 0xd2e0164500000000, 0xa40119d800000000, 0x64abd4bd00000000, + 0x124adb2000000000, 0xc96fba5c00000000, 0xbf8eb5c100000000, + 0x493a219700000000, 0x3fdb2e0a00000000, 0xe4fe4f7600000000, + 0x921f40eb00000000, 0x52b58d8e00000000, 0x2454821300000000, + 0xff71e36f00000000, 0x8990ecf200000000, 0xcb60ae0f00000000, + 0xbd81a19200000000, 0x66a4c0ee00000000, 0x1045cf7300000000, + 0xd0ef021600000000, 0xa60e0d8b00000000, 0x7d2b6cf700000000, + 0x0bca636a00000000, 0xfd7ef73c00000000, 0x8b9ff8a100000000, + 0x50ba99dd00000000, 0x265b964000000000, 0xe6f15b2500000000, + 0x901054b800000000, 0x4b3535c400000000, 0x3dd43a5900000000, + 0xa75c1c6900000000, 0xd1bd13f400000000, 0x0a98728800000000, + 0x7c797d1500000000, 0xbcd3b07000000000, 0xca32bfed00000000, + 0x1117de9100000000, 0x67f6d10c00000000, 0x9142455a00000000, + 0xe7a34ac700000000, 0x3c862bbb00000000, 0x4a67242600000000, + 0x8acde94300000000, 0xfc2ce6de00000000, 0x270987a200000000, + 0x51e8883f00000000}, + {0x0000000000000000, 0xe8dbfbb900000000, 0x91b186a800000000, + 0x796a7d1100000000, 0x63657c8a00000000, 0x8bbe873300000000, + 0xf2d4fa2200000000, 0x1a0f019b00000000, 0x87cc89cf00000000, + 0x6f17727600000000, 0x167d0f6700000000, 0xfea6f4de00000000, + 0xe4a9f54500000000, 0x0c720efc00000000, 0x751873ed00000000, + 0x9dc3885400000000, 0x4f9f624400000000, 0xa74499fd00000000, + 0xde2ee4ec00000000, 0x36f51f5500000000, 0x2cfa1ece00000000, + 0xc421e57700000000, 0xbd4b986600000000, 0x559063df00000000, + 0xc853eb8b00000000, 0x2088103200000000, 0x59e26d2300000000, + 0xb139969a00000000, 0xab36970100000000, 0x43ed6cb800000000, + 0x3a8711a900000000, 0xd25cea1000000000, 0x9e3ec58800000000, + 0x76e53e3100000000, 0x0f8f432000000000, 0xe754b89900000000, + 0xfd5bb90200000000, 0x158042bb00000000, 0x6cea3faa00000000, + 0x8431c41300000000, 0x19f24c4700000000, 0xf129b7fe00000000, + 0x8843caef00000000, 0x6098315600000000, 0x7a9730cd00000000, + 0x924ccb7400000000, 0xeb26b66500000000, 0x03fd4ddc00000000, + 0xd1a1a7cc00000000, 0x397a5c7500000000, 0x4010216400000000, + 0xa8cbdadd00000000, 0xb2c4db4600000000, 0x5a1f20ff00000000, + 0x23755dee00000000, 0xcbaea65700000000, 0x566d2e0300000000, + 0xbeb6d5ba00000000, 0xc7dca8ab00000000, 0x2f07531200000000, + 0x3508528900000000, 0xddd3a93000000000, 0xa4b9d42100000000, + 0x4c622f9800000000, 0x7d7bfbca00000000, 0x95a0007300000000, + 0xecca7d6200000000, 0x041186db00000000, 0x1e1e874000000000, + 0xf6c57cf900000000, 0x8faf01e800000000, 0x6774fa5100000000, + 0xfab7720500000000, 0x126c89bc00000000, 0x6b06f4ad00000000, + 0x83dd0f1400000000, 0x99d20e8f00000000, 0x7109f53600000000, + 0x0863882700000000, 0xe0b8739e00000000, 0x32e4998e00000000, + 0xda3f623700000000, 0xa3551f2600000000, 0x4b8ee49f00000000, + 0x5181e50400000000, 0xb95a1ebd00000000, 0xc03063ac00000000, + 0x28eb981500000000, 0xb528104100000000, 0x5df3ebf800000000, + 0x249996e900000000, 0xcc426d5000000000, 0xd64d6ccb00000000, + 0x3e96977200000000, 0x47fcea6300000000, 0xaf2711da00000000, + 0xe3453e4200000000, 0x0b9ec5fb00000000, 0x72f4b8ea00000000, + 0x9a2f435300000000, 0x802042c800000000, 0x68fbb97100000000, + 0x1191c46000000000, 0xf94a3fd900000000, 0x6489b78d00000000, + 0x8c524c3400000000, 0xf538312500000000, 0x1de3ca9c00000000, + 0x07eccb0700000000, 0xef3730be00000000, 0x965d4daf00000000, + 0x7e86b61600000000, 0xacda5c0600000000, 0x4401a7bf00000000, + 0x3d6bdaae00000000, 0xd5b0211700000000, 0xcfbf208c00000000, + 0x2764db3500000000, 0x5e0ea62400000000, 0xb6d55d9d00000000, + 0x2b16d5c900000000, 0xc3cd2e7000000000, 0xbaa7536100000000, + 0x527ca8d800000000, 0x4873a94300000000, 0xa0a852fa00000000, + 0xd9c22feb00000000, 0x3119d45200000000, 0xbbf0874e00000000, + 0x532b7cf700000000, 0x2a4101e600000000, 0xc29afa5f00000000, + 0xd895fbc400000000, 0x304e007d00000000, 0x49247d6c00000000, + 0xa1ff86d500000000, 0x3c3c0e8100000000, 0xd4e7f53800000000, + 0xad8d882900000000, 0x4556739000000000, 0x5f59720b00000000, + 0xb78289b200000000, 0xcee8f4a300000000, 0x26330f1a00000000, + 0xf46fe50a00000000, 0x1cb41eb300000000, 0x65de63a200000000, + 0x8d05981b00000000, 0x970a998000000000, 0x7fd1623900000000, + 0x06bb1f2800000000, 0xee60e49100000000, 0x73a36cc500000000, + 0x9b78977c00000000, 0xe212ea6d00000000, 0x0ac911d400000000, + 0x10c6104f00000000, 0xf81debf600000000, 0x817796e700000000, + 0x69ac6d5e00000000, 0x25ce42c600000000, 0xcd15b97f00000000, + 0xb47fc46e00000000, 0x5ca43fd700000000, 0x46ab3e4c00000000, + 0xae70c5f500000000, 0xd71ab8e400000000, 0x3fc1435d00000000, + 0xa202cb0900000000, 0x4ad930b000000000, 0x33b34da100000000, + 0xdb68b61800000000, 0xc167b78300000000, 0x29bc4c3a00000000, + 0x50d6312b00000000, 0xb80dca9200000000, 0x6a51208200000000, + 0x828adb3b00000000, 0xfbe0a62a00000000, 0x133b5d9300000000, + 0x09345c0800000000, 0xe1efa7b100000000, 0x9885daa000000000, + 0x705e211900000000, 0xed9da94d00000000, 0x054652f400000000, + 0x7c2c2fe500000000, 0x94f7d45c00000000, 0x8ef8d5c700000000, + 0x66232e7e00000000, 0x1f49536f00000000, 0xf792a8d600000000, + 0xc68b7c8400000000, 0x2e50873d00000000, 0x573afa2c00000000, + 0xbfe1019500000000, 0xa5ee000e00000000, 0x4d35fbb700000000, + 0x345f86a600000000, 0xdc847d1f00000000, 0x4147f54b00000000, + 0xa99c0ef200000000, 0xd0f673e300000000, 0x382d885a00000000, + 0x222289c100000000, 0xcaf9727800000000, 0xb3930f6900000000, + 0x5b48f4d000000000, 0x89141ec000000000, 0x61cfe57900000000, + 0x18a5986800000000, 0xf07e63d100000000, 0xea71624a00000000, + 0x02aa99f300000000, 0x7bc0e4e200000000, 0x931b1f5b00000000, + 0x0ed8970f00000000, 0xe6036cb600000000, 0x9f6911a700000000, + 0x77b2ea1e00000000, 0x6dbdeb8500000000, 0x8566103c00000000, + 0xfc0c6d2d00000000, 0x14d7969400000000, 0x58b5b90c00000000, + 0xb06e42b500000000, 0xc9043fa400000000, 0x21dfc41d00000000, + 0x3bd0c58600000000, 0xd30b3e3f00000000, 0xaa61432e00000000, + 0x42bab89700000000, 0xdf7930c300000000, 0x37a2cb7a00000000, + 0x4ec8b66b00000000, 0xa6134dd200000000, 0xbc1c4c4900000000, + 0x54c7b7f000000000, 0x2dadcae100000000, 0xc576315800000000, + 0x172adb4800000000, 0xfff120f100000000, 0x869b5de000000000, + 0x6e40a65900000000, 0x744fa7c200000000, 0x9c945c7b00000000, + 0xe5fe216a00000000, 0x0d25dad300000000, 0x90e6528700000000, + 0x783da93e00000000, 0x0157d42f00000000, 0xe98c2f9600000000, + 0xf3832e0d00000000, 0x1b58d5b400000000, 0x6232a8a500000000, + 0x8ae9531c00000000}, + {0x0000000000000000, 0x919168ae00000000, 0x6325a08700000000, + 0xf2b4c82900000000, 0x874c31d400000000, 0x16dd597a00000000, + 0xe469915300000000, 0x75f8f9fd00000000, 0x4f9f137300000000, + 0xde0e7bdd00000000, 0x2cbab3f400000000, 0xbd2bdb5a00000000, + 0xc8d322a700000000, 0x59424a0900000000, 0xabf6822000000000, + 0x3a67ea8e00000000, 0x9e3e27e600000000, 0x0faf4f4800000000, + 0xfd1b876100000000, 0x6c8aefcf00000000, 0x1972163200000000, + 0x88e37e9c00000000, 0x7a57b6b500000000, 0xebc6de1b00000000, + 0xd1a1349500000000, 0x40305c3b00000000, 0xb284941200000000, + 0x2315fcbc00000000, 0x56ed054100000000, 0xc77c6def00000000, + 0x35c8a5c600000000, 0xa459cd6800000000, 0x7d7b3f1700000000, + 0xecea57b900000000, 0x1e5e9f9000000000, 0x8fcff73e00000000, + 0xfa370ec300000000, 0x6ba6666d00000000, 0x9912ae4400000000, + 0x0883c6ea00000000, 0x32e42c6400000000, 0xa37544ca00000000, + 0x51c18ce300000000, 0xc050e44d00000000, 0xb5a81db000000000, + 0x2439751e00000000, 0xd68dbd3700000000, 0x471cd59900000000, + 0xe34518f100000000, 0x72d4705f00000000, 0x8060b87600000000, + 0x11f1d0d800000000, 0x6409292500000000, 0xf598418b00000000, + 0x072c89a200000000, 0x96bde10c00000000, 0xacda0b8200000000, + 0x3d4b632c00000000, 0xcfffab0500000000, 0x5e6ec3ab00000000, + 0x2b963a5600000000, 0xba0752f800000000, 0x48b39ad100000000, + 0xd922f27f00000000, 0xfaf67e2e00000000, 0x6b67168000000000, + 0x99d3dea900000000, 0x0842b60700000000, 0x7dba4ffa00000000, + 0xec2b275400000000, 0x1e9fef7d00000000, 0x8f0e87d300000000, + 0xb5696d5d00000000, 0x24f805f300000000, 0xd64ccdda00000000, + 0x47dda57400000000, 0x32255c8900000000, 0xa3b4342700000000, + 0x5100fc0e00000000, 0xc09194a000000000, 0x64c859c800000000, + 0xf559316600000000, 0x07edf94f00000000, 0x967c91e100000000, + 0xe384681c00000000, 0x721500b200000000, 0x80a1c89b00000000, + 0x1130a03500000000, 0x2b574abb00000000, 0xbac6221500000000, + 0x4872ea3c00000000, 0xd9e3829200000000, 0xac1b7b6f00000000, + 0x3d8a13c100000000, 0xcf3edbe800000000, 0x5eafb34600000000, + 0x878d413900000000, 0x161c299700000000, 0xe4a8e1be00000000, + 0x7539891000000000, 0x00c170ed00000000, 0x9150184300000000, + 0x63e4d06a00000000, 0xf275b8c400000000, 0xc812524a00000000, + 0x59833ae400000000, 0xab37f2cd00000000, 0x3aa69a6300000000, + 0x4f5e639e00000000, 0xdecf0b3000000000, 0x2c7bc31900000000, + 0xbdeaabb700000000, 0x19b366df00000000, 0x88220e7100000000, + 0x7a96c65800000000, 0xeb07aef600000000, 0x9eff570b00000000, + 0x0f6e3fa500000000, 0xfddaf78c00000000, 0x6c4b9f2200000000, + 0x562c75ac00000000, 0xc7bd1d0200000000, 0x3509d52b00000000, + 0xa498bd8500000000, 0xd160447800000000, 0x40f12cd600000000, + 0xb245e4ff00000000, 0x23d48c5100000000, 0xf4edfd5c00000000, + 0x657c95f200000000, 0x97c85ddb00000000, 0x0659357500000000, + 0x73a1cc8800000000, 0xe230a42600000000, 0x10846c0f00000000, + 0x811504a100000000, 0xbb72ee2f00000000, 0x2ae3868100000000, + 0xd8574ea800000000, 0x49c6260600000000, 0x3c3edffb00000000, + 0xadafb75500000000, 0x5f1b7f7c00000000, 0xce8a17d200000000, + 0x6ad3daba00000000, 0xfb42b21400000000, 0x09f67a3d00000000, + 0x9867129300000000, 0xed9feb6e00000000, 0x7c0e83c000000000, + 0x8eba4be900000000, 0x1f2b234700000000, 0x254cc9c900000000, + 0xb4dda16700000000, 0x4669694e00000000, 0xd7f801e000000000, + 0xa200f81d00000000, 0x339190b300000000, 0xc125589a00000000, + 0x50b4303400000000, 0x8996c24b00000000, 0x1807aae500000000, + 0xeab362cc00000000, 0x7b220a6200000000, 0x0edaf39f00000000, + 0x9f4b9b3100000000, 0x6dff531800000000, 0xfc6e3bb600000000, + 0xc609d13800000000, 0x5798b99600000000, 0xa52c71bf00000000, + 0x34bd191100000000, 0x4145e0ec00000000, 0xd0d4884200000000, + 0x2260406b00000000, 0xb3f128c500000000, 0x17a8e5ad00000000, + 0x86398d0300000000, 0x748d452a00000000, 0xe51c2d8400000000, + 0x90e4d47900000000, 0x0175bcd700000000, 0xf3c174fe00000000, + 0x62501c5000000000, 0x5837f6de00000000, 0xc9a69e7000000000, + 0x3b12565900000000, 0xaa833ef700000000, 0xdf7bc70a00000000, + 0x4eeaafa400000000, 0xbc5e678d00000000, 0x2dcf0f2300000000, + 0x0e1b837200000000, 0x9f8aebdc00000000, 0x6d3e23f500000000, + 0xfcaf4b5b00000000, 0x8957b2a600000000, 0x18c6da0800000000, + 0xea72122100000000, 0x7be37a8f00000000, 0x4184900100000000, + 0xd015f8af00000000, 0x22a1308600000000, 0xb330582800000000, + 0xc6c8a1d500000000, 0x5759c97b00000000, 0xa5ed015200000000, + 0x347c69fc00000000, 0x9025a49400000000, 0x01b4cc3a00000000, + 0xf300041300000000, 0x62916cbd00000000, 0x1769954000000000, + 0x86f8fdee00000000, 0x744c35c700000000, 0xe5dd5d6900000000, + 0xdfbab7e700000000, 0x4e2bdf4900000000, 0xbc9f176000000000, + 0x2d0e7fce00000000, 0x58f6863300000000, 0xc967ee9d00000000, + 0x3bd326b400000000, 0xaa424e1a00000000, 0x7360bc6500000000, + 0xe2f1d4cb00000000, 0x10451ce200000000, 0x81d4744c00000000, + 0xf42c8db100000000, 0x65bde51f00000000, 0x97092d3600000000, + 0x0698459800000000, 0x3cffaf1600000000, 0xad6ec7b800000000, + 0x5fda0f9100000000, 0xce4b673f00000000, 0xbbb39ec200000000, + 0x2a22f66c00000000, 0xd8963e4500000000, 0x490756eb00000000, + 0xed5e9b8300000000, 0x7ccff32d00000000, 0x8e7b3b0400000000, + 0x1fea53aa00000000, 0x6a12aa5700000000, 0xfb83c2f900000000, + 0x09370ad000000000, 0x98a6627e00000000, 0xa2c188f000000000, + 0x3350e05e00000000, 0xc1e4287700000000, 0x507540d900000000, + 0x258db92400000000, 0xb41cd18a00000000, 0x46a819a300000000, + 0xd739710d00000000}}; + +#else /* W == 4 */ + +local const z_crc_t FAR crc_braid_table[][256] = { + {0x00000000, 0xccaa009e, 0x4225077d, 0x8e8f07e3, 0x844a0efa, + 0x48e00e64, 0xc66f0987, 0x0ac50919, 0xd3e51bb5, 0x1f4f1b2b, + 0x91c01cc8, 0x5d6a1c56, 0x57af154f, 0x9b0515d1, 0x158a1232, + 0xd92012ac, 0x7cbb312b, 0xb01131b5, 0x3e9e3656, 0xf23436c8, + 0xf8f13fd1, 0x345b3f4f, 0xbad438ac, 0x767e3832, 0xaf5e2a9e, + 0x63f42a00, 0xed7b2de3, 0x21d12d7d, 0x2b142464, 0xe7be24fa, + 0x69312319, 0xa59b2387, 0xf9766256, 0x35dc62c8, 0xbb53652b, + 0x77f965b5, 0x7d3c6cac, 0xb1966c32, 0x3f196bd1, 0xf3b36b4f, + 0x2a9379e3, 0xe639797d, 0x68b67e9e, 0xa41c7e00, 0xaed97719, + 0x62737787, 0xecfc7064, 0x205670fa, 0x85cd537d, 0x496753e3, + 0xc7e85400, 0x0b42549e, 0x01875d87, 0xcd2d5d19, 0x43a25afa, + 0x8f085a64, 0x562848c8, 0x9a824856, 0x140d4fb5, 0xd8a74f2b, + 0xd2624632, 0x1ec846ac, 0x9047414f, 0x5ced41d1, 0x299dc2ed, + 0xe537c273, 0x6bb8c590, 0xa712c50e, 0xadd7cc17, 0x617dcc89, + 0xeff2cb6a, 0x2358cbf4, 0xfa78d958, 0x36d2d9c6, 0xb85dde25, + 0x74f7debb, 0x7e32d7a2, 0xb298d73c, 0x3c17d0df, 0xf0bdd041, + 0x5526f3c6, 0x998cf358, 0x1703f4bb, 0xdba9f425, 0xd16cfd3c, + 0x1dc6fda2, 0x9349fa41, 0x5fe3fadf, 0x86c3e873, 0x4a69e8ed, + 0xc4e6ef0e, 0x084cef90, 0x0289e689, 0xce23e617, 0x40ace1f4, + 0x8c06e16a, 0xd0eba0bb, 0x1c41a025, 0x92cea7c6, 0x5e64a758, + 0x54a1ae41, 0x980baedf, 0x1684a93c, 0xda2ea9a2, 0x030ebb0e, + 0xcfa4bb90, 0x412bbc73, 0x8d81bced, 0x8744b5f4, 0x4beeb56a, + 0xc561b289, 0x09cbb217, 0xac509190, 0x60fa910e, 0xee7596ed, + 0x22df9673, 0x281a9f6a, 0xe4b09ff4, 0x6a3f9817, 0xa6959889, + 0x7fb58a25, 0xb31f8abb, 0x3d908d58, 0xf13a8dc6, 0xfbff84df, + 0x37558441, 0xb9da83a2, 0x7570833c, 0x533b85da, 0x9f918544, + 0x111e82a7, 0xddb48239, 0xd7718b20, 0x1bdb8bbe, 0x95548c5d, + 0x59fe8cc3, 0x80de9e6f, 0x4c749ef1, 0xc2fb9912, 0x0e51998c, + 0x04949095, 0xc83e900b, 0x46b197e8, 0x8a1b9776, 0x2f80b4f1, + 0xe32ab46f, 0x6da5b38c, 0xa10fb312, 0xabcaba0b, 0x6760ba95, + 0xe9efbd76, 0x2545bde8, 0xfc65af44, 0x30cfafda, 0xbe40a839, + 0x72eaa8a7, 0x782fa1be, 0xb485a120, 0x3a0aa6c3, 0xf6a0a65d, + 0xaa4de78c, 0x66e7e712, 0xe868e0f1, 0x24c2e06f, 0x2e07e976, + 0xe2ade9e8, 0x6c22ee0b, 0xa088ee95, 0x79a8fc39, 0xb502fca7, + 0x3b8dfb44, 0xf727fbda, 0xfde2f2c3, 0x3148f25d, 0xbfc7f5be, + 0x736df520, 0xd6f6d6a7, 0x1a5cd639, 0x94d3d1da, 0x5879d144, + 0x52bcd85d, 0x9e16d8c3, 0x1099df20, 0xdc33dfbe, 0x0513cd12, + 0xc9b9cd8c, 0x4736ca6f, 0x8b9ccaf1, 0x8159c3e8, 0x4df3c376, + 0xc37cc495, 0x0fd6c40b, 0x7aa64737, 0xb60c47a9, 0x3883404a, + 0xf42940d4, 0xfeec49cd, 0x32464953, 0xbcc94eb0, 0x70634e2e, + 0xa9435c82, 0x65e95c1c, 0xeb665bff, 0x27cc5b61, 0x2d095278, + 0xe1a352e6, 0x6f2c5505, 0xa386559b, 0x061d761c, 0xcab77682, + 0x44387161, 0x889271ff, 0x825778e6, 0x4efd7878, 0xc0727f9b, + 0x0cd87f05, 0xd5f86da9, 0x19526d37, 0x97dd6ad4, 0x5b776a4a, + 0x51b26353, 0x9d1863cd, 0x1397642e, 0xdf3d64b0, 0x83d02561, + 0x4f7a25ff, 0xc1f5221c, 0x0d5f2282, 0x079a2b9b, 0xcb302b05, + 0x45bf2ce6, 0x89152c78, 0x50353ed4, 0x9c9f3e4a, 0x121039a9, + 0xdeba3937, 0xd47f302e, 0x18d530b0, 0x965a3753, 0x5af037cd, + 0xff6b144a, 0x33c114d4, 0xbd4e1337, 0x71e413a9, 0x7b211ab0, + 0xb78b1a2e, 0x39041dcd, 0xf5ae1d53, 0x2c8e0fff, 0xe0240f61, + 0x6eab0882, 0xa201081c, 0xa8c40105, 0x646e019b, 0xeae10678, + 0x264b06e6}, + {0x00000000, 0xa6770bb4, 0x979f1129, 0x31e81a9d, 0xf44f2413, + 0x52382fa7, 0x63d0353a, 0xc5a73e8e, 0x33ef4e67, 0x959845d3, + 0xa4705f4e, 0x020754fa, 0xc7a06a74, 0x61d761c0, 0x503f7b5d, + 0xf64870e9, 0x67de9cce, 0xc1a9977a, 0xf0418de7, 0x56368653, + 0x9391b8dd, 0x35e6b369, 0x040ea9f4, 0xa279a240, 0x5431d2a9, + 0xf246d91d, 0xc3aec380, 0x65d9c834, 0xa07ef6ba, 0x0609fd0e, + 0x37e1e793, 0x9196ec27, 0xcfbd399c, 0x69ca3228, 0x582228b5, + 0xfe552301, 0x3bf21d8f, 0x9d85163b, 0xac6d0ca6, 0x0a1a0712, + 0xfc5277fb, 0x5a257c4f, 0x6bcd66d2, 0xcdba6d66, 0x081d53e8, + 0xae6a585c, 0x9f8242c1, 0x39f54975, 0xa863a552, 0x0e14aee6, + 0x3ffcb47b, 0x998bbfcf, 0x5c2c8141, 0xfa5b8af5, 0xcbb39068, + 0x6dc49bdc, 0x9b8ceb35, 0x3dfbe081, 0x0c13fa1c, 0xaa64f1a8, + 0x6fc3cf26, 0xc9b4c492, 0xf85cde0f, 0x5e2bd5bb, 0x440b7579, + 0xe27c7ecd, 0xd3946450, 0x75e36fe4, 0xb044516a, 0x16335ade, + 0x27db4043, 0x81ac4bf7, 0x77e43b1e, 0xd19330aa, 0xe07b2a37, + 0x460c2183, 0x83ab1f0d, 0x25dc14b9, 0x14340e24, 0xb2430590, + 0x23d5e9b7, 0x85a2e203, 0xb44af89e, 0x123df32a, 0xd79acda4, + 0x71edc610, 0x4005dc8d, 0xe672d739, 0x103aa7d0, 0xb64dac64, + 0x87a5b6f9, 0x21d2bd4d, 0xe47583c3, 0x42028877, 0x73ea92ea, + 0xd59d995e, 0x8bb64ce5, 0x2dc14751, 0x1c295dcc, 0xba5e5678, + 0x7ff968f6, 0xd98e6342, 0xe86679df, 0x4e11726b, 0xb8590282, + 0x1e2e0936, 0x2fc613ab, 0x89b1181f, 0x4c162691, 0xea612d25, + 0xdb8937b8, 0x7dfe3c0c, 0xec68d02b, 0x4a1fdb9f, 0x7bf7c102, + 0xdd80cab6, 0x1827f438, 0xbe50ff8c, 0x8fb8e511, 0x29cfeea5, + 0xdf879e4c, 0x79f095f8, 0x48188f65, 0xee6f84d1, 0x2bc8ba5f, + 0x8dbfb1eb, 0xbc57ab76, 0x1a20a0c2, 0x8816eaf2, 0x2e61e146, + 0x1f89fbdb, 0xb9fef06f, 0x7c59cee1, 0xda2ec555, 0xebc6dfc8, + 0x4db1d47c, 0xbbf9a495, 0x1d8eaf21, 0x2c66b5bc, 0x8a11be08, + 0x4fb68086, 0xe9c18b32, 0xd82991af, 0x7e5e9a1b, 0xefc8763c, + 0x49bf7d88, 0x78576715, 0xde206ca1, 0x1b87522f, 0xbdf0599b, + 0x8c184306, 0x2a6f48b2, 0xdc27385b, 0x7a5033ef, 0x4bb82972, + 0xedcf22c6, 0x28681c48, 0x8e1f17fc, 0xbff70d61, 0x198006d5, + 0x47abd36e, 0xe1dcd8da, 0xd034c247, 0x7643c9f3, 0xb3e4f77d, + 0x1593fcc9, 0x247be654, 0x820cede0, 0x74449d09, 0xd23396bd, + 0xe3db8c20, 0x45ac8794, 0x800bb91a, 0x267cb2ae, 0x1794a833, + 0xb1e3a387, 0x20754fa0, 0x86024414, 0xb7ea5e89, 0x119d553d, + 0xd43a6bb3, 0x724d6007, 0x43a57a9a, 0xe5d2712e, 0x139a01c7, + 0xb5ed0a73, 0x840510ee, 0x22721b5a, 0xe7d525d4, 0x41a22e60, + 0x704a34fd, 0xd63d3f49, 0xcc1d9f8b, 0x6a6a943f, 0x5b828ea2, + 0xfdf58516, 0x3852bb98, 0x9e25b02c, 0xafcdaab1, 0x09baa105, + 0xfff2d1ec, 0x5985da58, 0x686dc0c5, 0xce1acb71, 0x0bbdf5ff, + 0xadcafe4b, 0x9c22e4d6, 0x3a55ef62, 0xabc30345, 0x0db408f1, + 0x3c5c126c, 0x9a2b19d8, 0x5f8c2756, 0xf9fb2ce2, 0xc813367f, + 0x6e643dcb, 0x982c4d22, 0x3e5b4696, 0x0fb35c0b, 0xa9c457bf, + 0x6c636931, 0xca146285, 0xfbfc7818, 0x5d8b73ac, 0x03a0a617, + 0xa5d7ada3, 0x943fb73e, 0x3248bc8a, 0xf7ef8204, 0x519889b0, + 0x6070932d, 0xc6079899, 0x304fe870, 0x9638e3c4, 0xa7d0f959, + 0x01a7f2ed, 0xc400cc63, 0x6277c7d7, 0x539fdd4a, 0xf5e8d6fe, + 0x647e3ad9, 0xc209316d, 0xf3e12bf0, 0x55962044, 0x90311eca, + 0x3646157e, 0x07ae0fe3, 0xa1d90457, 0x579174be, 0xf1e67f0a, + 0xc00e6597, 0x66796e23, 0xa3de50ad, 0x05a95b19, 0x34414184, + 0x92364a30}, + {0x00000000, 0xcb5cd3a5, 0x4dc8a10b, 0x869472ae, 0x9b914216, + 0x50cd91b3, 0xd659e31d, 0x1d0530b8, 0xec53826d, 0x270f51c8, + 0xa19b2366, 0x6ac7f0c3, 0x77c2c07b, 0xbc9e13de, 0x3a0a6170, + 0xf156b2d5, 0x03d6029b, 0xc88ad13e, 0x4e1ea390, 0x85427035, + 0x9847408d, 0x531b9328, 0xd58fe186, 0x1ed33223, 0xef8580f6, + 0x24d95353, 0xa24d21fd, 0x6911f258, 0x7414c2e0, 0xbf481145, + 0x39dc63eb, 0xf280b04e, 0x07ac0536, 0xccf0d693, 0x4a64a43d, + 0x81387798, 0x9c3d4720, 0x57619485, 0xd1f5e62b, 0x1aa9358e, + 0xebff875b, 0x20a354fe, 0xa6372650, 0x6d6bf5f5, 0x706ec54d, + 0xbb3216e8, 0x3da66446, 0xf6fab7e3, 0x047a07ad, 0xcf26d408, + 0x49b2a6a6, 0x82ee7503, 0x9feb45bb, 0x54b7961e, 0xd223e4b0, + 0x197f3715, 0xe82985c0, 0x23755665, 0xa5e124cb, 0x6ebdf76e, + 0x73b8c7d6, 0xb8e41473, 0x3e7066dd, 0xf52cb578, 0x0f580a6c, + 0xc404d9c9, 0x4290ab67, 0x89cc78c2, 0x94c9487a, 0x5f959bdf, + 0xd901e971, 0x125d3ad4, 0xe30b8801, 0x28575ba4, 0xaec3290a, + 0x659ffaaf, 0x789aca17, 0xb3c619b2, 0x35526b1c, 0xfe0eb8b9, + 0x0c8e08f7, 0xc7d2db52, 0x4146a9fc, 0x8a1a7a59, 0x971f4ae1, + 0x5c439944, 0xdad7ebea, 0x118b384f, 0xe0dd8a9a, 0x2b81593f, + 0xad152b91, 0x6649f834, 0x7b4cc88c, 0xb0101b29, 0x36846987, + 0xfdd8ba22, 0x08f40f5a, 0xc3a8dcff, 0x453cae51, 0x8e607df4, + 0x93654d4c, 0x58399ee9, 0xdeadec47, 0x15f13fe2, 0xe4a78d37, + 0x2ffb5e92, 0xa96f2c3c, 0x6233ff99, 0x7f36cf21, 0xb46a1c84, + 0x32fe6e2a, 0xf9a2bd8f, 0x0b220dc1, 0xc07ede64, 0x46eaacca, + 0x8db67f6f, 0x90b34fd7, 0x5bef9c72, 0xdd7beedc, 0x16273d79, + 0xe7718fac, 0x2c2d5c09, 0xaab92ea7, 0x61e5fd02, 0x7ce0cdba, + 0xb7bc1e1f, 0x31286cb1, 0xfa74bf14, 0x1eb014d8, 0xd5ecc77d, + 0x5378b5d3, 0x98246676, 0x852156ce, 0x4e7d856b, 0xc8e9f7c5, + 0x03b52460, 0xf2e396b5, 0x39bf4510, 0xbf2b37be, 0x7477e41b, + 0x6972d4a3, 0xa22e0706, 0x24ba75a8, 0xefe6a60d, 0x1d661643, + 0xd63ac5e6, 0x50aeb748, 0x9bf264ed, 0x86f75455, 0x4dab87f0, + 0xcb3ff55e, 0x006326fb, 0xf135942e, 0x3a69478b, 0xbcfd3525, + 0x77a1e680, 0x6aa4d638, 0xa1f8059d, 0x276c7733, 0xec30a496, + 0x191c11ee, 0xd240c24b, 0x54d4b0e5, 0x9f886340, 0x828d53f8, + 0x49d1805d, 0xcf45f2f3, 0x04192156, 0xf54f9383, 0x3e134026, + 0xb8873288, 0x73dbe12d, 0x6eded195, 0xa5820230, 0x2316709e, + 0xe84aa33b, 0x1aca1375, 0xd196c0d0, 0x5702b27e, 0x9c5e61db, + 0x815b5163, 0x4a0782c6, 0xcc93f068, 0x07cf23cd, 0xf6999118, + 0x3dc542bd, 0xbb513013, 0x700de3b6, 0x6d08d30e, 0xa65400ab, + 0x20c07205, 0xeb9ca1a0, 0x11e81eb4, 0xdab4cd11, 0x5c20bfbf, + 0x977c6c1a, 0x8a795ca2, 0x41258f07, 0xc7b1fda9, 0x0ced2e0c, + 0xfdbb9cd9, 0x36e74f7c, 0xb0733dd2, 0x7b2fee77, 0x662adecf, + 0xad760d6a, 0x2be27fc4, 0xe0beac61, 0x123e1c2f, 0xd962cf8a, + 0x5ff6bd24, 0x94aa6e81, 0x89af5e39, 0x42f38d9c, 0xc467ff32, + 0x0f3b2c97, 0xfe6d9e42, 0x35314de7, 0xb3a53f49, 0x78f9ecec, + 0x65fcdc54, 0xaea00ff1, 0x28347d5f, 0xe368aefa, 0x16441b82, + 0xdd18c827, 0x5b8cba89, 0x90d0692c, 0x8dd55994, 0x46898a31, + 0xc01df89f, 0x0b412b3a, 0xfa1799ef, 0x314b4a4a, 0xb7df38e4, + 0x7c83eb41, 0x6186dbf9, 0xaada085c, 0x2c4e7af2, 0xe712a957, + 0x15921919, 0xdececabc, 0x585ab812, 0x93066bb7, 0x8e035b0f, + 0x455f88aa, 0xc3cbfa04, 0x089729a1, 0xf9c19b74, 0x329d48d1, + 0xb4093a7f, 0x7f55e9da, 0x6250d962, 0xa90c0ac7, 0x2f987869, + 0xe4c4abcc}, + {0x00000000, 0x3d6029b0, 0x7ac05360, 0x47a07ad0, 0xf580a6c0, + 0xc8e08f70, 0x8f40f5a0, 0xb220dc10, 0x30704bc1, 0x0d106271, + 0x4ab018a1, 0x77d03111, 0xc5f0ed01, 0xf890c4b1, 0xbf30be61, + 0x825097d1, 0x60e09782, 0x5d80be32, 0x1a20c4e2, 0x2740ed52, + 0x95603142, 0xa80018f2, 0xefa06222, 0xd2c04b92, 0x5090dc43, + 0x6df0f5f3, 0x2a508f23, 0x1730a693, 0xa5107a83, 0x98705333, + 0xdfd029e3, 0xe2b00053, 0xc1c12f04, 0xfca106b4, 0xbb017c64, + 0x866155d4, 0x344189c4, 0x0921a074, 0x4e81daa4, 0x73e1f314, + 0xf1b164c5, 0xccd14d75, 0x8b7137a5, 0xb6111e15, 0x0431c205, + 0x3951ebb5, 0x7ef19165, 0x4391b8d5, 0xa121b886, 0x9c419136, + 0xdbe1ebe6, 0xe681c256, 0x54a11e46, 0x69c137f6, 0x2e614d26, + 0x13016496, 0x9151f347, 0xac31daf7, 0xeb91a027, 0xd6f18997, + 0x64d15587, 0x59b17c37, 0x1e1106e7, 0x23712f57, 0x58f35849, + 0x659371f9, 0x22330b29, 0x1f532299, 0xad73fe89, 0x9013d739, + 0xd7b3ade9, 0xead38459, 0x68831388, 0x55e33a38, 0x124340e8, + 0x2f236958, 0x9d03b548, 0xa0639cf8, 0xe7c3e628, 0xdaa3cf98, + 0x3813cfcb, 0x0573e67b, 0x42d39cab, 0x7fb3b51b, 0xcd93690b, + 0xf0f340bb, 0xb7533a6b, 0x8a3313db, 0x0863840a, 0x3503adba, + 0x72a3d76a, 0x4fc3feda, 0xfde322ca, 0xc0830b7a, 0x872371aa, + 0xba43581a, 0x9932774d, 0xa4525efd, 0xe3f2242d, 0xde920d9d, + 0x6cb2d18d, 0x51d2f83d, 0x167282ed, 0x2b12ab5d, 0xa9423c8c, + 0x9422153c, 0xd3826fec, 0xeee2465c, 0x5cc29a4c, 0x61a2b3fc, + 0x2602c92c, 0x1b62e09c, 0xf9d2e0cf, 0xc4b2c97f, 0x8312b3af, + 0xbe729a1f, 0x0c52460f, 0x31326fbf, 0x7692156f, 0x4bf23cdf, + 0xc9a2ab0e, 0xf4c282be, 0xb362f86e, 0x8e02d1de, 0x3c220dce, + 0x0142247e, 0x46e25eae, 0x7b82771e, 0xb1e6b092, 0x8c869922, + 0xcb26e3f2, 0xf646ca42, 0x44661652, 0x79063fe2, 0x3ea64532, + 0x03c66c82, 0x8196fb53, 0xbcf6d2e3, 0xfb56a833, 0xc6368183, + 0x74165d93, 0x49767423, 0x0ed60ef3, 0x33b62743, 0xd1062710, + 0xec660ea0, 0xabc67470, 0x96a65dc0, 0x248681d0, 0x19e6a860, + 0x5e46d2b0, 0x6326fb00, 0xe1766cd1, 0xdc164561, 0x9bb63fb1, + 0xa6d61601, 0x14f6ca11, 0x2996e3a1, 0x6e369971, 0x5356b0c1, + 0x70279f96, 0x4d47b626, 0x0ae7ccf6, 0x3787e546, 0x85a73956, + 0xb8c710e6, 0xff676a36, 0xc2074386, 0x4057d457, 0x7d37fde7, + 0x3a978737, 0x07f7ae87, 0xb5d77297, 0x88b75b27, 0xcf1721f7, + 0xf2770847, 0x10c70814, 0x2da721a4, 0x6a075b74, 0x576772c4, + 0xe547aed4, 0xd8278764, 0x9f87fdb4, 0xa2e7d404, 0x20b743d5, + 0x1dd76a65, 0x5a7710b5, 0x67173905, 0xd537e515, 0xe857cca5, + 0xaff7b675, 0x92979fc5, 0xe915e8db, 0xd475c16b, 0x93d5bbbb, + 0xaeb5920b, 0x1c954e1b, 0x21f567ab, 0x66551d7b, 0x5b3534cb, + 0xd965a31a, 0xe4058aaa, 0xa3a5f07a, 0x9ec5d9ca, 0x2ce505da, + 0x11852c6a, 0x562556ba, 0x6b457f0a, 0x89f57f59, 0xb49556e9, + 0xf3352c39, 0xce550589, 0x7c75d999, 0x4115f029, 0x06b58af9, + 0x3bd5a349, 0xb9853498, 0x84e51d28, 0xc34567f8, 0xfe254e48, + 0x4c059258, 0x7165bbe8, 0x36c5c138, 0x0ba5e888, 0x28d4c7df, + 0x15b4ee6f, 0x521494bf, 0x6f74bd0f, 0xdd54611f, 0xe03448af, + 0xa794327f, 0x9af41bcf, 0x18a48c1e, 0x25c4a5ae, 0x6264df7e, + 0x5f04f6ce, 0xed242ade, 0xd044036e, 0x97e479be, 0xaa84500e, + 0x4834505d, 0x755479ed, 0x32f4033d, 0x0f942a8d, 0xbdb4f69d, + 0x80d4df2d, 0xc774a5fd, 0xfa148c4d, 0x78441b9c, 0x4524322c, + 0x028448fc, 0x3fe4614c, 0x8dc4bd5c, 0xb0a494ec, 0xf704ee3c, + 0xca64c78c}}; + +local const z_word_t FAR crc_braid_big_table[][256] = { + {0x00000000, 0xb029603d, 0x6053c07a, 0xd07aa047, 0xc0a680f5, + 0x708fe0c8, 0xa0f5408f, 0x10dc20b2, 0xc14b7030, 0x7162100d, + 0xa118b04a, 0x1131d077, 0x01edf0c5, 0xb1c490f8, 0x61be30bf, + 0xd1975082, 0x8297e060, 0x32be805d, 0xe2c4201a, 0x52ed4027, + 0x42316095, 0xf21800a8, 0x2262a0ef, 0x924bc0d2, 0x43dc9050, + 0xf3f5f06d, 0x238f502a, 0x93a63017, 0x837a10a5, 0x33537098, + 0xe329d0df, 0x5300b0e2, 0x042fc1c1, 0xb406a1fc, 0x647c01bb, + 0xd4556186, 0xc4894134, 0x74a02109, 0xa4da814e, 0x14f3e173, + 0xc564b1f1, 0x754dd1cc, 0xa537718b, 0x151e11b6, 0x05c23104, + 0xb5eb5139, 0x6591f17e, 0xd5b89143, 0x86b821a1, 0x3691419c, + 0xe6ebe1db, 0x56c281e6, 0x461ea154, 0xf637c169, 0x264d612e, + 0x96640113, 0x47f35191, 0xf7da31ac, 0x27a091eb, 0x9789f1d6, + 0x8755d164, 0x377cb159, 0xe706111e, 0x572f7123, 0x4958f358, + 0xf9719365, 0x290b3322, 0x9922531f, 0x89fe73ad, 0x39d71390, + 0xe9adb3d7, 0x5984d3ea, 0x88138368, 0x383ae355, 0xe8404312, + 0x5869232f, 0x48b5039d, 0xf89c63a0, 0x28e6c3e7, 0x98cfa3da, + 0xcbcf1338, 0x7be67305, 0xab9cd342, 0x1bb5b37f, 0x0b6993cd, + 0xbb40f3f0, 0x6b3a53b7, 0xdb13338a, 0x0a846308, 0xbaad0335, + 0x6ad7a372, 0xdafec34f, 0xca22e3fd, 0x7a0b83c0, 0xaa712387, + 0x1a5843ba, 0x4d773299, 0xfd5e52a4, 0x2d24f2e3, 0x9d0d92de, + 0x8dd1b26c, 0x3df8d251, 0xed827216, 0x5dab122b, 0x8c3c42a9, + 0x3c152294, 0xec6f82d3, 0x5c46e2ee, 0x4c9ac25c, 0xfcb3a261, + 0x2cc90226, 0x9ce0621b, 0xcfe0d2f9, 0x7fc9b2c4, 0xafb31283, + 0x1f9a72be, 0x0f46520c, 0xbf6f3231, 0x6f159276, 0xdf3cf24b, + 0x0eaba2c9, 0xbe82c2f4, 0x6ef862b3, 0xded1028e, 0xce0d223c, + 0x7e244201, 0xae5ee246, 0x1e77827b, 0x92b0e6b1, 0x2299868c, + 0xf2e326cb, 0x42ca46f6, 0x52166644, 0xe23f0679, 0x3245a63e, + 0x826cc603, 0x53fb9681, 0xe3d2f6bc, 0x33a856fb, 0x838136c6, + 0x935d1674, 0x23747649, 0xf30ed60e, 0x4327b633, 0x102706d1, + 0xa00e66ec, 0x7074c6ab, 0xc05da696, 0xd0818624, 0x60a8e619, + 0xb0d2465e, 0x00fb2663, 0xd16c76e1, 0x614516dc, 0xb13fb69b, + 0x0116d6a6, 0x11caf614, 0xa1e39629, 0x7199366e, 0xc1b05653, + 0x969f2770, 0x26b6474d, 0xf6cce70a, 0x46e58737, 0x5639a785, + 0xe610c7b8, 0x366a67ff, 0x864307c2, 0x57d45740, 0xe7fd377d, + 0x3787973a, 0x87aef707, 0x9772d7b5, 0x275bb788, 0xf72117cf, + 0x470877f2, 0x1408c710, 0xa421a72d, 0x745b076a, 0xc4726757, + 0xd4ae47e5, 0x648727d8, 0xb4fd879f, 0x04d4e7a2, 0xd543b720, + 0x656ad71d, 0xb510775a, 0x05391767, 0x15e537d5, 0xa5cc57e8, + 0x75b6f7af, 0xc59f9792, 0xdbe815e9, 0x6bc175d4, 0xbbbbd593, + 0x0b92b5ae, 0x1b4e951c, 0xab67f521, 0x7b1d5566, 0xcb34355b, + 0x1aa365d9, 0xaa8a05e4, 0x7af0a5a3, 0xcad9c59e, 0xda05e52c, + 0x6a2c8511, 0xba562556, 0x0a7f456b, 0x597ff589, 0xe95695b4, + 0x392c35f3, 0x890555ce, 0x99d9757c, 0x29f01541, 0xf98ab506, + 0x49a3d53b, 0x983485b9, 0x281de584, 0xf86745c3, 0x484e25fe, + 0x5892054c, 0xe8bb6571, 0x38c1c536, 0x88e8a50b, 0xdfc7d428, + 0x6feeb415, 0xbf941452, 0x0fbd746f, 0x1f6154dd, 0xaf4834e0, + 0x7f3294a7, 0xcf1bf49a, 0x1e8ca418, 0xaea5c425, 0x7edf6462, + 0xcef6045f, 0xde2a24ed, 0x6e0344d0, 0xbe79e497, 0x0e5084aa, + 0x5d503448, 0xed795475, 0x3d03f432, 0x8d2a940f, 0x9df6b4bd, + 0x2ddfd480, 0xfda574c7, 0x4d8c14fa, 0x9c1b4478, 0x2c322445, + 0xfc488402, 0x4c61e43f, 0x5cbdc48d, 0xec94a4b0, 0x3cee04f7, + 0x8cc764ca}, + {0x00000000, 0xa5d35ccb, 0x0ba1c84d, 0xae729486, 0x1642919b, + 0xb391cd50, 0x1de359d6, 0xb830051d, 0x6d8253ec, 0xc8510f27, + 0x66239ba1, 0xc3f0c76a, 0x7bc0c277, 0xde139ebc, 0x70610a3a, + 0xd5b256f1, 0x9b02d603, 0x3ed18ac8, 0x90a31e4e, 0x35704285, + 0x8d404798, 0x28931b53, 0x86e18fd5, 0x2332d31e, 0xf68085ef, + 0x5353d924, 0xfd214da2, 0x58f21169, 0xe0c21474, 0x451148bf, + 0xeb63dc39, 0x4eb080f2, 0x3605ac07, 0x93d6f0cc, 0x3da4644a, + 0x98773881, 0x20473d9c, 0x85946157, 0x2be6f5d1, 0x8e35a91a, + 0x5b87ffeb, 0xfe54a320, 0x502637a6, 0xf5f56b6d, 0x4dc56e70, + 0xe81632bb, 0x4664a63d, 0xe3b7faf6, 0xad077a04, 0x08d426cf, + 0xa6a6b249, 0x0375ee82, 0xbb45eb9f, 0x1e96b754, 0xb0e423d2, + 0x15377f19, 0xc08529e8, 0x65567523, 0xcb24e1a5, 0x6ef7bd6e, + 0xd6c7b873, 0x7314e4b8, 0xdd66703e, 0x78b52cf5, 0x6c0a580f, + 0xc9d904c4, 0x67ab9042, 0xc278cc89, 0x7a48c994, 0xdf9b955f, + 0x71e901d9, 0xd43a5d12, 0x01880be3, 0xa45b5728, 0x0a29c3ae, + 0xaffa9f65, 0x17ca9a78, 0xb219c6b3, 0x1c6b5235, 0xb9b80efe, + 0xf7088e0c, 0x52dbd2c7, 0xfca94641, 0x597a1a8a, 0xe14a1f97, + 0x4499435c, 0xeaebd7da, 0x4f388b11, 0x9a8adde0, 0x3f59812b, + 0x912b15ad, 0x34f84966, 0x8cc84c7b, 0x291b10b0, 0x87698436, + 0x22bad8fd, 0x5a0ff408, 0xffdca8c3, 0x51ae3c45, 0xf47d608e, + 0x4c4d6593, 0xe99e3958, 0x47ecadde, 0xe23ff115, 0x378da7e4, + 0x925efb2f, 0x3c2c6fa9, 0x99ff3362, 0x21cf367f, 0x841c6ab4, + 0x2a6efe32, 0x8fbda2f9, 0xc10d220b, 0x64de7ec0, 0xcaacea46, + 0x6f7fb68d, 0xd74fb390, 0x729cef5b, 0xdcee7bdd, 0x793d2716, + 0xac8f71e7, 0x095c2d2c, 0xa72eb9aa, 0x02fde561, 0xbacde07c, + 0x1f1ebcb7, 0xb16c2831, 0x14bf74fa, 0xd814b01e, 0x7dc7ecd5, + 0xd3b57853, 0x76662498, 0xce562185, 0x6b857d4e, 0xc5f7e9c8, + 0x6024b503, 0xb596e3f2, 0x1045bf39, 0xbe372bbf, 0x1be47774, + 0xa3d47269, 0x06072ea2, 0xa875ba24, 0x0da6e6ef, 0x4316661d, + 0xe6c53ad6, 0x48b7ae50, 0xed64f29b, 0x5554f786, 0xf087ab4d, + 0x5ef53fcb, 0xfb266300, 0x2e9435f1, 0x8b47693a, 0x2535fdbc, + 0x80e6a177, 0x38d6a46a, 0x9d05f8a1, 0x33776c27, 0x96a430ec, + 0xee111c19, 0x4bc240d2, 0xe5b0d454, 0x4063889f, 0xf8538d82, + 0x5d80d149, 0xf3f245cf, 0x56211904, 0x83934ff5, 0x2640133e, + 0x883287b8, 0x2de1db73, 0x95d1de6e, 0x300282a5, 0x9e701623, + 0x3ba34ae8, 0x7513ca1a, 0xd0c096d1, 0x7eb20257, 0xdb615e9c, + 0x63515b81, 0xc682074a, 0x68f093cc, 0xcd23cf07, 0x189199f6, + 0xbd42c53d, 0x133051bb, 0xb6e30d70, 0x0ed3086d, 0xab0054a6, + 0x0572c020, 0xa0a19ceb, 0xb41ee811, 0x11cdb4da, 0xbfbf205c, + 0x1a6c7c97, 0xa25c798a, 0x078f2541, 0xa9fdb1c7, 0x0c2eed0c, + 0xd99cbbfd, 0x7c4fe736, 0xd23d73b0, 0x77ee2f7b, 0xcfde2a66, + 0x6a0d76ad, 0xc47fe22b, 0x61acbee0, 0x2f1c3e12, 0x8acf62d9, + 0x24bdf65f, 0x816eaa94, 0x395eaf89, 0x9c8df342, 0x32ff67c4, + 0x972c3b0f, 0x429e6dfe, 0xe74d3135, 0x493fa5b3, 0xececf978, + 0x54dcfc65, 0xf10fa0ae, 0x5f7d3428, 0xfaae68e3, 0x821b4416, + 0x27c818dd, 0x89ba8c5b, 0x2c69d090, 0x9459d58d, 0x318a8946, + 0x9ff81dc0, 0x3a2b410b, 0xef9917fa, 0x4a4a4b31, 0xe438dfb7, + 0x41eb837c, 0xf9db8661, 0x5c08daaa, 0xf27a4e2c, 0x57a912e7, + 0x19199215, 0xbccacede, 0x12b85a58, 0xb76b0693, 0x0f5b038e, + 0xaa885f45, 0x04facbc3, 0xa1299708, 0x749bc1f9, 0xd1489d32, + 0x7f3a09b4, 0xdae9557f, 0x62d95062, 0xc70a0ca9, 0x6978982f, + 0xccabc4e4}, + {0x00000000, 0xb40b77a6, 0x29119f97, 0x9d1ae831, 0x13244ff4, + 0xa72f3852, 0x3a35d063, 0x8e3ea7c5, 0x674eef33, 0xd3459895, + 0x4e5f70a4, 0xfa540702, 0x746aa0c7, 0xc061d761, 0x5d7b3f50, + 0xe97048f6, 0xce9cde67, 0x7a97a9c1, 0xe78d41f0, 0x53863656, + 0xddb89193, 0x69b3e635, 0xf4a90e04, 0x40a279a2, 0xa9d23154, + 0x1dd946f2, 0x80c3aec3, 0x34c8d965, 0xbaf67ea0, 0x0efd0906, + 0x93e7e137, 0x27ec9691, 0x9c39bdcf, 0x2832ca69, 0xb5282258, + 0x012355fe, 0x8f1df23b, 0x3b16859d, 0xa60c6dac, 0x12071a0a, + 0xfb7752fc, 0x4f7c255a, 0xd266cd6b, 0x666dbacd, 0xe8531d08, + 0x5c586aae, 0xc142829f, 0x7549f539, 0x52a563a8, 0xe6ae140e, + 0x7bb4fc3f, 0xcfbf8b99, 0x41812c5c, 0xf58a5bfa, 0x6890b3cb, + 0xdc9bc46d, 0x35eb8c9b, 0x81e0fb3d, 0x1cfa130c, 0xa8f164aa, + 0x26cfc36f, 0x92c4b4c9, 0x0fde5cf8, 0xbbd52b5e, 0x79750b44, + 0xcd7e7ce2, 0x506494d3, 0xe46fe375, 0x6a5144b0, 0xde5a3316, + 0x4340db27, 0xf74bac81, 0x1e3be477, 0xaa3093d1, 0x372a7be0, + 0x83210c46, 0x0d1fab83, 0xb914dc25, 0x240e3414, 0x900543b2, + 0xb7e9d523, 0x03e2a285, 0x9ef84ab4, 0x2af33d12, 0xa4cd9ad7, + 0x10c6ed71, 0x8ddc0540, 0x39d772e6, 0xd0a73a10, 0x64ac4db6, + 0xf9b6a587, 0x4dbdd221, 0xc38375e4, 0x77880242, 0xea92ea73, + 0x5e999dd5, 0xe54cb68b, 0x5147c12d, 0xcc5d291c, 0x78565eba, + 0xf668f97f, 0x42638ed9, 0xdf7966e8, 0x6b72114e, 0x820259b8, + 0x36092e1e, 0xab13c62f, 0x1f18b189, 0x9126164c, 0x252d61ea, + 0xb83789db, 0x0c3cfe7d, 0x2bd068ec, 0x9fdb1f4a, 0x02c1f77b, + 0xb6ca80dd, 0x38f42718, 0x8cff50be, 0x11e5b88f, 0xa5eecf29, + 0x4c9e87df, 0xf895f079, 0x658f1848, 0xd1846fee, 0x5fbac82b, + 0xebb1bf8d, 0x76ab57bc, 0xc2a0201a, 0xf2ea1688, 0x46e1612e, + 0xdbfb891f, 0x6ff0feb9, 0xe1ce597c, 0x55c52eda, 0xc8dfc6eb, + 0x7cd4b14d, 0x95a4f9bb, 0x21af8e1d, 0xbcb5662c, 0x08be118a, + 0x8680b64f, 0x328bc1e9, 0xaf9129d8, 0x1b9a5e7e, 0x3c76c8ef, + 0x887dbf49, 0x15675778, 0xa16c20de, 0x2f52871b, 0x9b59f0bd, + 0x0643188c, 0xb2486f2a, 0x5b3827dc, 0xef33507a, 0x7229b84b, + 0xc622cfed, 0x481c6828, 0xfc171f8e, 0x610df7bf, 0xd5068019, + 0x6ed3ab47, 0xdad8dce1, 0x47c234d0, 0xf3c94376, 0x7df7e4b3, + 0xc9fc9315, 0x54e67b24, 0xe0ed0c82, 0x099d4474, 0xbd9633d2, + 0x208cdbe3, 0x9487ac45, 0x1ab90b80, 0xaeb27c26, 0x33a89417, + 0x87a3e3b1, 0xa04f7520, 0x14440286, 0x895eeab7, 0x3d559d11, + 0xb36b3ad4, 0x07604d72, 0x9a7aa543, 0x2e71d2e5, 0xc7019a13, + 0x730aedb5, 0xee100584, 0x5a1b7222, 0xd425d5e7, 0x602ea241, + 0xfd344a70, 0x493f3dd6, 0x8b9f1dcc, 0x3f946a6a, 0xa28e825b, + 0x1685f5fd, 0x98bb5238, 0x2cb0259e, 0xb1aacdaf, 0x05a1ba09, + 0xecd1f2ff, 0x58da8559, 0xc5c06d68, 0x71cb1ace, 0xfff5bd0b, + 0x4bfecaad, 0xd6e4229c, 0x62ef553a, 0x4503c3ab, 0xf108b40d, + 0x6c125c3c, 0xd8192b9a, 0x56278c5f, 0xe22cfbf9, 0x7f3613c8, + 0xcb3d646e, 0x224d2c98, 0x96465b3e, 0x0b5cb30f, 0xbf57c4a9, + 0x3169636c, 0x856214ca, 0x1878fcfb, 0xac738b5d, 0x17a6a003, + 0xa3add7a5, 0x3eb73f94, 0x8abc4832, 0x0482eff7, 0xb0899851, + 0x2d937060, 0x999807c6, 0x70e84f30, 0xc4e33896, 0x59f9d0a7, + 0xedf2a701, 0x63cc00c4, 0xd7c77762, 0x4add9f53, 0xfed6e8f5, + 0xd93a7e64, 0x6d3109c2, 0xf02be1f3, 0x44209655, 0xca1e3190, + 0x7e154636, 0xe30fae07, 0x5704d9a1, 0xbe749157, 0x0a7fe6f1, + 0x97650ec0, 0x236e7966, 0xad50dea3, 0x195ba905, 0x84414134, + 0x304a3692}, + {0x00000000, 0x9e00aacc, 0x7d072542, 0xe3078f8e, 0xfa0e4a84, + 0x640ee048, 0x87096fc6, 0x1909c50a, 0xb51be5d3, 0x2b1b4f1f, + 0xc81cc091, 0x561c6a5d, 0x4f15af57, 0xd115059b, 0x32128a15, + 0xac1220d9, 0x2b31bb7c, 0xb53111b0, 0x56369e3e, 0xc83634f2, + 0xd13ff1f8, 0x4f3f5b34, 0xac38d4ba, 0x32387e76, 0x9e2a5eaf, + 0x002af463, 0xe32d7bed, 0x7d2dd121, 0x6424142b, 0xfa24bee7, + 0x19233169, 0x87239ba5, 0x566276f9, 0xc862dc35, 0x2b6553bb, + 0xb565f977, 0xac6c3c7d, 0x326c96b1, 0xd16b193f, 0x4f6bb3f3, + 0xe379932a, 0x7d7939e6, 0x9e7eb668, 0x007e1ca4, 0x1977d9ae, + 0x87777362, 0x6470fcec, 0xfa705620, 0x7d53cd85, 0xe3536749, + 0x0054e8c7, 0x9e54420b, 0x875d8701, 0x195d2dcd, 0xfa5aa243, + 0x645a088f, 0xc8482856, 0x5648829a, 0xb54f0d14, 0x2b4fa7d8, + 0x324662d2, 0xac46c81e, 0x4f414790, 0xd141ed5c, 0xedc29d29, + 0x73c237e5, 0x90c5b86b, 0x0ec512a7, 0x17ccd7ad, 0x89cc7d61, + 0x6acbf2ef, 0xf4cb5823, 0x58d978fa, 0xc6d9d236, 0x25de5db8, + 0xbbdef774, 0xa2d7327e, 0x3cd798b2, 0xdfd0173c, 0x41d0bdf0, + 0xc6f32655, 0x58f38c99, 0xbbf40317, 0x25f4a9db, 0x3cfd6cd1, + 0xa2fdc61d, 0x41fa4993, 0xdffae35f, 0x73e8c386, 0xede8694a, + 0x0eefe6c4, 0x90ef4c08, 0x89e68902, 0x17e623ce, 0xf4e1ac40, + 0x6ae1068c, 0xbba0ebd0, 0x25a0411c, 0xc6a7ce92, 0x58a7645e, + 0x41aea154, 0xdfae0b98, 0x3ca98416, 0xa2a92eda, 0x0ebb0e03, + 0x90bba4cf, 0x73bc2b41, 0xedbc818d, 0xf4b54487, 0x6ab5ee4b, + 0x89b261c5, 0x17b2cb09, 0x909150ac, 0x0e91fa60, 0xed9675ee, + 0x7396df22, 0x6a9f1a28, 0xf49fb0e4, 0x17983f6a, 0x899895a6, + 0x258ab57f, 0xbb8a1fb3, 0x588d903d, 0xc68d3af1, 0xdf84fffb, + 0x41845537, 0xa283dab9, 0x3c837075, 0xda853b53, 0x4485919f, + 0xa7821e11, 0x3982b4dd, 0x208b71d7, 0xbe8bdb1b, 0x5d8c5495, + 0xc38cfe59, 0x6f9ede80, 0xf19e744c, 0x1299fbc2, 0x8c99510e, + 0x95909404, 0x0b903ec8, 0xe897b146, 0x76971b8a, 0xf1b4802f, + 0x6fb42ae3, 0x8cb3a56d, 0x12b30fa1, 0x0bbacaab, 0x95ba6067, + 0x76bdefe9, 0xe8bd4525, 0x44af65fc, 0xdaafcf30, 0x39a840be, + 0xa7a8ea72, 0xbea12f78, 0x20a185b4, 0xc3a60a3a, 0x5da6a0f6, + 0x8ce74daa, 0x12e7e766, 0xf1e068e8, 0x6fe0c224, 0x76e9072e, + 0xe8e9ade2, 0x0bee226c, 0x95ee88a0, 0x39fca879, 0xa7fc02b5, + 0x44fb8d3b, 0xdafb27f7, 0xc3f2e2fd, 0x5df24831, 0xbef5c7bf, + 0x20f56d73, 0xa7d6f6d6, 0x39d65c1a, 0xdad1d394, 0x44d17958, + 0x5dd8bc52, 0xc3d8169e, 0x20df9910, 0xbedf33dc, 0x12cd1305, + 0x8ccdb9c9, 0x6fca3647, 0xf1ca9c8b, 0xe8c35981, 0x76c3f34d, + 0x95c47cc3, 0x0bc4d60f, 0x3747a67a, 0xa9470cb6, 0x4a408338, + 0xd44029f4, 0xcd49ecfe, 0x53494632, 0xb04ec9bc, 0x2e4e6370, + 0x825c43a9, 0x1c5ce965, 0xff5b66eb, 0x615bcc27, 0x7852092d, + 0xe652a3e1, 0x05552c6f, 0x9b5586a3, 0x1c761d06, 0x8276b7ca, + 0x61713844, 0xff719288, 0xe6785782, 0x7878fd4e, 0x9b7f72c0, + 0x057fd80c, 0xa96df8d5, 0x376d5219, 0xd46add97, 0x4a6a775b, + 0x5363b251, 0xcd63189d, 0x2e649713, 0xb0643ddf, 0x6125d083, + 0xff257a4f, 0x1c22f5c1, 0x82225f0d, 0x9b2b9a07, 0x052b30cb, + 0xe62cbf45, 0x782c1589, 0xd43e3550, 0x4a3e9f9c, 0xa9391012, + 0x3739bade, 0x2e307fd4, 0xb030d518, 0x53375a96, 0xcd37f05a, + 0x4a146bff, 0xd414c133, 0x37134ebd, 0xa913e471, 0xb01a217b, + 0x2e1a8bb7, 0xcd1d0439, 0x531daef5, 0xff0f8e2c, 0x610f24e0, + 0x8208ab6e, 0x1c0801a2, 0x0501c4a8, 0x9b016e64, 0x7806e1ea, + 0xe6064b26}}; + +#endif + +#endif + +#if N == 3 + +#if W == 8 + +local const z_crc_t FAR crc_braid_table[][256] = { + {0x00000000, 0x81256527, 0xd93bcc0f, 0x581ea928, 0x69069e5f, + 0xe823fb78, 0xb03d5250, 0x31183777, 0xd20d3cbe, 0x53285999, + 0x0b36f0b1, 0x8a139596, 0xbb0ba2e1, 0x3a2ec7c6, 0x62306eee, + 0xe3150bc9, 0x7f6b7f3d, 0xfe4e1a1a, 0xa650b332, 0x2775d615, + 0x166de162, 0x97488445, 0xcf562d6d, 0x4e73484a, 0xad664383, + 0x2c4326a4, 0x745d8f8c, 0xf578eaab, 0xc460dddc, 0x4545b8fb, + 0x1d5b11d3, 0x9c7e74f4, 0xfed6fe7a, 0x7ff39b5d, 0x27ed3275, + 0xa6c85752, 0x97d06025, 0x16f50502, 0x4eebac2a, 0xcfcec90d, + 0x2cdbc2c4, 0xadfea7e3, 0xf5e00ecb, 0x74c56bec, 0x45dd5c9b, + 0xc4f839bc, 0x9ce69094, 0x1dc3f5b3, 0x81bd8147, 0x0098e460, + 0x58864d48, 0xd9a3286f, 0xe8bb1f18, 0x699e7a3f, 0x3180d317, + 0xb0a5b630, 0x53b0bdf9, 0xd295d8de, 0x8a8b71f6, 0x0bae14d1, + 0x3ab623a6, 0xbb934681, 0xe38defa9, 0x62a88a8e, 0x26dcfab5, + 0xa7f99f92, 0xffe736ba, 0x7ec2539d, 0x4fda64ea, 0xceff01cd, + 0x96e1a8e5, 0x17c4cdc2, 0xf4d1c60b, 0x75f4a32c, 0x2dea0a04, + 0xaccf6f23, 0x9dd75854, 0x1cf23d73, 0x44ec945b, 0xc5c9f17c, + 0x59b78588, 0xd892e0af, 0x808c4987, 0x01a92ca0, 0x30b11bd7, + 0xb1947ef0, 0xe98ad7d8, 0x68afb2ff, 0x8bbab936, 0x0a9fdc11, + 0x52817539, 0xd3a4101e, 0xe2bc2769, 0x6399424e, 0x3b87eb66, + 0xbaa28e41, 0xd80a04cf, 0x592f61e8, 0x0131c8c0, 0x8014ade7, + 0xb10c9a90, 0x3029ffb7, 0x6837569f, 0xe91233b8, 0x0a073871, + 0x8b225d56, 0xd33cf47e, 0x52199159, 0x6301a62e, 0xe224c309, + 0xba3a6a21, 0x3b1f0f06, 0xa7617bf2, 0x26441ed5, 0x7e5ab7fd, + 0xff7fd2da, 0xce67e5ad, 0x4f42808a, 0x175c29a2, 0x96794c85, + 0x756c474c, 0xf449226b, 0xac578b43, 0x2d72ee64, 0x1c6ad913, + 0x9d4fbc34, 0xc551151c, 0x4474703b, 0x4db9f56a, 0xcc9c904d, + 0x94823965, 0x15a75c42, 0x24bf6b35, 0xa59a0e12, 0xfd84a73a, + 0x7ca1c21d, 0x9fb4c9d4, 0x1e91acf3, 0x468f05db, 0xc7aa60fc, + 0xf6b2578b, 0x779732ac, 0x2f899b84, 0xaeacfea3, 0x32d28a57, + 0xb3f7ef70, 0xebe94658, 0x6acc237f, 0x5bd41408, 0xdaf1712f, + 0x82efd807, 0x03cabd20, 0xe0dfb6e9, 0x61fad3ce, 0x39e47ae6, + 0xb8c11fc1, 0x89d928b6, 0x08fc4d91, 0x50e2e4b9, 0xd1c7819e, + 0xb36f0b10, 0x324a6e37, 0x6a54c71f, 0xeb71a238, 0xda69954f, + 0x5b4cf068, 0x03525940, 0x82773c67, 0x616237ae, 0xe0475289, + 0xb859fba1, 0x397c9e86, 0x0864a9f1, 0x8941ccd6, 0xd15f65fe, + 0x507a00d9, 0xcc04742d, 0x4d21110a, 0x153fb822, 0x941add05, + 0xa502ea72, 0x24278f55, 0x7c39267d, 0xfd1c435a, 0x1e094893, + 0x9f2c2db4, 0xc732849c, 0x4617e1bb, 0x770fd6cc, 0xf62ab3eb, + 0xae341ac3, 0x2f117fe4, 0x6b650fdf, 0xea406af8, 0xb25ec3d0, + 0x337ba6f7, 0x02639180, 0x8346f4a7, 0xdb585d8f, 0x5a7d38a8, + 0xb9683361, 0x384d5646, 0x6053ff6e, 0xe1769a49, 0xd06ead3e, + 0x514bc819, 0x09556131, 0x88700416, 0x140e70e2, 0x952b15c5, + 0xcd35bced, 0x4c10d9ca, 0x7d08eebd, 0xfc2d8b9a, 0xa43322b2, + 0x25164795, 0xc6034c5c, 0x4726297b, 0x1f388053, 0x9e1de574, + 0xaf05d203, 0x2e20b724, 0x763e1e0c, 0xf71b7b2b, 0x95b3f1a5, + 0x14969482, 0x4c883daa, 0xcdad588d, 0xfcb56ffa, 0x7d900add, + 0x258ea3f5, 0xa4abc6d2, 0x47becd1b, 0xc69ba83c, 0x9e850114, + 0x1fa06433, 0x2eb85344, 0xaf9d3663, 0xf7839f4b, 0x76a6fa6c, + 0xead88e98, 0x6bfdebbf, 0x33e34297, 0xb2c627b0, 0x83de10c7, + 0x02fb75e0, 0x5ae5dcc8, 0xdbc0b9ef, 0x38d5b226, 0xb9f0d701, + 0xe1ee7e29, 0x60cb1b0e, 0x51d32c79, 0xd0f6495e, 0x88e8e076, + 0x09cd8551}, + {0x00000000, 0x9b73ead4, 0xed96d3e9, 0x76e5393d, 0x005ca193, + 0x9b2f4b47, 0xedca727a, 0x76b998ae, 0x00b94326, 0x9bcaa9f2, + 0xed2f90cf, 0x765c7a1b, 0x00e5e2b5, 0x9b960861, 0xed73315c, + 0x7600db88, 0x0172864c, 0x9a016c98, 0xece455a5, 0x7797bf71, + 0x012e27df, 0x9a5dcd0b, 0xecb8f436, 0x77cb1ee2, 0x01cbc56a, + 0x9ab82fbe, 0xec5d1683, 0x772efc57, 0x019764f9, 0x9ae48e2d, + 0xec01b710, 0x77725dc4, 0x02e50c98, 0x9996e64c, 0xef73df71, + 0x740035a5, 0x02b9ad0b, 0x99ca47df, 0xef2f7ee2, 0x745c9436, + 0x025c4fbe, 0x992fa56a, 0xefca9c57, 0x74b97683, 0x0200ee2d, + 0x997304f9, 0xef963dc4, 0x74e5d710, 0x03978ad4, 0x98e46000, + 0xee01593d, 0x7572b3e9, 0x03cb2b47, 0x98b8c193, 0xee5df8ae, + 0x752e127a, 0x032ec9f2, 0x985d2326, 0xeeb81a1b, 0x75cbf0cf, + 0x03726861, 0x980182b5, 0xeee4bb88, 0x7597515c, 0x05ca1930, + 0x9eb9f3e4, 0xe85ccad9, 0x732f200d, 0x0596b8a3, 0x9ee55277, + 0xe8006b4a, 0x7373819e, 0x05735a16, 0x9e00b0c2, 0xe8e589ff, + 0x7396632b, 0x052ffb85, 0x9e5c1151, 0xe8b9286c, 0x73cac2b8, + 0x04b89f7c, 0x9fcb75a8, 0xe92e4c95, 0x725da641, 0x04e43eef, + 0x9f97d43b, 0xe972ed06, 0x720107d2, 0x0401dc5a, 0x9f72368e, + 0xe9970fb3, 0x72e4e567, 0x045d7dc9, 0x9f2e971d, 0xe9cbae20, + 0x72b844f4, 0x072f15a8, 0x9c5cff7c, 0xeab9c641, 0x71ca2c95, + 0x0773b43b, 0x9c005eef, 0xeae567d2, 0x71968d06, 0x0796568e, + 0x9ce5bc5a, 0xea008567, 0x71736fb3, 0x07caf71d, 0x9cb91dc9, + 0xea5c24f4, 0x712fce20, 0x065d93e4, 0x9d2e7930, 0xebcb400d, + 0x70b8aad9, 0x06013277, 0x9d72d8a3, 0xeb97e19e, 0x70e40b4a, + 0x06e4d0c2, 0x9d973a16, 0xeb72032b, 0x7001e9ff, 0x06b87151, + 0x9dcb9b85, 0xeb2ea2b8, 0x705d486c, 0x0b943260, 0x90e7d8b4, + 0xe602e189, 0x7d710b5d, 0x0bc893f3, 0x90bb7927, 0xe65e401a, + 0x7d2daace, 0x0b2d7146, 0x905e9b92, 0xe6bba2af, 0x7dc8487b, + 0x0b71d0d5, 0x90023a01, 0xe6e7033c, 0x7d94e9e8, 0x0ae6b42c, + 0x91955ef8, 0xe77067c5, 0x7c038d11, 0x0aba15bf, 0x91c9ff6b, + 0xe72cc656, 0x7c5f2c82, 0x0a5ff70a, 0x912c1dde, 0xe7c924e3, + 0x7cbace37, 0x0a035699, 0x9170bc4d, 0xe7958570, 0x7ce66fa4, + 0x09713ef8, 0x9202d42c, 0xe4e7ed11, 0x7f9407c5, 0x092d9f6b, + 0x925e75bf, 0xe4bb4c82, 0x7fc8a656, 0x09c87dde, 0x92bb970a, + 0xe45eae37, 0x7f2d44e3, 0x0994dc4d, 0x92e73699, 0xe4020fa4, + 0x7f71e570, 0x0803b8b4, 0x93705260, 0xe5956b5d, 0x7ee68189, + 0x085f1927, 0x932cf3f3, 0xe5c9cace, 0x7eba201a, 0x08bafb92, + 0x93c91146, 0xe52c287b, 0x7e5fc2af, 0x08e65a01, 0x9395b0d5, + 0xe57089e8, 0x7e03633c, 0x0e5e2b50, 0x952dc184, 0xe3c8f8b9, + 0x78bb126d, 0x0e028ac3, 0x95716017, 0xe394592a, 0x78e7b3fe, + 0x0ee76876, 0x959482a2, 0xe371bb9f, 0x7802514b, 0x0ebbc9e5, + 0x95c82331, 0xe32d1a0c, 0x785ef0d8, 0x0f2cad1c, 0x945f47c8, + 0xe2ba7ef5, 0x79c99421, 0x0f700c8f, 0x9403e65b, 0xe2e6df66, + 0x799535b2, 0x0f95ee3a, 0x94e604ee, 0xe2033dd3, 0x7970d707, + 0x0fc94fa9, 0x94baa57d, 0xe25f9c40, 0x792c7694, 0x0cbb27c8, + 0x97c8cd1c, 0xe12df421, 0x7a5e1ef5, 0x0ce7865b, 0x97946c8f, + 0xe17155b2, 0x7a02bf66, 0x0c0264ee, 0x97718e3a, 0xe194b707, + 0x7ae75dd3, 0x0c5ec57d, 0x972d2fa9, 0xe1c81694, 0x7abbfc40, + 0x0dc9a184, 0x96ba4b50, 0xe05f726d, 0x7b2c98b9, 0x0d950017, + 0x96e6eac3, 0xe003d3fe, 0x7b70392a, 0x0d70e2a2, 0x96030876, + 0xe0e6314b, 0x7b95db9f, 0x0d2c4331, 0x965fa9e5, 0xe0ba90d8, + 0x7bc97a0c}, + {0x00000000, 0x172864c0, 0x2e50c980, 0x3978ad40, 0x5ca19300, + 0x4b89f7c0, 0x72f15a80, 0x65d93e40, 0xb9432600, 0xae6b42c0, + 0x9713ef80, 0x803b8b40, 0xe5e2b500, 0xf2cad1c0, 0xcbb27c80, + 0xdc9a1840, 0xa9f74a41, 0xbedf2e81, 0x87a783c1, 0x908fe701, + 0xf556d941, 0xe27ebd81, 0xdb0610c1, 0xcc2e7401, 0x10b46c41, + 0x079c0881, 0x3ee4a5c1, 0x29ccc101, 0x4c15ff41, 0x5b3d9b81, + 0x624536c1, 0x756d5201, 0x889f92c3, 0x9fb7f603, 0xa6cf5b43, + 0xb1e73f83, 0xd43e01c3, 0xc3166503, 0xfa6ec843, 0xed46ac83, + 0x31dcb4c3, 0x26f4d003, 0x1f8c7d43, 0x08a41983, 0x6d7d27c3, + 0x7a554303, 0x432dee43, 0x54058a83, 0x2168d882, 0x3640bc42, + 0x0f381102, 0x181075c2, 0x7dc94b82, 0x6ae12f42, 0x53998202, + 0x44b1e6c2, 0x982bfe82, 0x8f039a42, 0xb67b3702, 0xa15353c2, + 0xc48a6d82, 0xd3a20942, 0xeadaa402, 0xfdf2c0c2, 0xca4e23c7, + 0xdd664707, 0xe41eea47, 0xf3368e87, 0x96efb0c7, 0x81c7d407, + 0xb8bf7947, 0xaf971d87, 0x730d05c7, 0x64256107, 0x5d5dcc47, + 0x4a75a887, 0x2fac96c7, 0x3884f207, 0x01fc5f47, 0x16d43b87, + 0x63b96986, 0x74910d46, 0x4de9a006, 0x5ac1c4c6, 0x3f18fa86, + 0x28309e46, 0x11483306, 0x066057c6, 0xdafa4f86, 0xcdd22b46, + 0xf4aa8606, 0xe382e2c6, 0x865bdc86, 0x9173b846, 0xa80b1506, + 0xbf2371c6, 0x42d1b104, 0x55f9d5c4, 0x6c817884, 0x7ba91c44, + 0x1e702204, 0x095846c4, 0x3020eb84, 0x27088f44, 0xfb929704, + 0xecbaf3c4, 0xd5c25e84, 0xc2ea3a44, 0xa7330404, 0xb01b60c4, + 0x8963cd84, 0x9e4ba944, 0xeb26fb45, 0xfc0e9f85, 0xc57632c5, + 0xd25e5605, 0xb7876845, 0xa0af0c85, 0x99d7a1c5, 0x8effc505, + 0x5265dd45, 0x454db985, 0x7c3514c5, 0x6b1d7005, 0x0ec44e45, + 0x19ec2a85, 0x209487c5, 0x37bce305, 0x4fed41cf, 0x58c5250f, + 0x61bd884f, 0x7695ec8f, 0x134cd2cf, 0x0464b60f, 0x3d1c1b4f, + 0x2a347f8f, 0xf6ae67cf, 0xe186030f, 0xd8feae4f, 0xcfd6ca8f, + 0xaa0ff4cf, 0xbd27900f, 0x845f3d4f, 0x9377598f, 0xe61a0b8e, + 0xf1326f4e, 0xc84ac20e, 0xdf62a6ce, 0xbabb988e, 0xad93fc4e, + 0x94eb510e, 0x83c335ce, 0x5f592d8e, 0x4871494e, 0x7109e40e, + 0x662180ce, 0x03f8be8e, 0x14d0da4e, 0x2da8770e, 0x3a8013ce, + 0xc772d30c, 0xd05ab7cc, 0xe9221a8c, 0xfe0a7e4c, 0x9bd3400c, + 0x8cfb24cc, 0xb583898c, 0xa2abed4c, 0x7e31f50c, 0x691991cc, + 0x50613c8c, 0x4749584c, 0x2290660c, 0x35b802cc, 0x0cc0af8c, + 0x1be8cb4c, 0x6e85994d, 0x79adfd8d, 0x40d550cd, 0x57fd340d, + 0x32240a4d, 0x250c6e8d, 0x1c74c3cd, 0x0b5ca70d, 0xd7c6bf4d, + 0xc0eedb8d, 0xf99676cd, 0xeebe120d, 0x8b672c4d, 0x9c4f488d, + 0xa537e5cd, 0xb21f810d, 0x85a36208, 0x928b06c8, 0xabf3ab88, + 0xbcdbcf48, 0xd902f108, 0xce2a95c8, 0xf7523888, 0xe07a5c48, + 0x3ce04408, 0x2bc820c8, 0x12b08d88, 0x0598e948, 0x6041d708, + 0x7769b3c8, 0x4e111e88, 0x59397a48, 0x2c542849, 0x3b7c4c89, + 0x0204e1c9, 0x152c8509, 0x70f5bb49, 0x67dddf89, 0x5ea572c9, + 0x498d1609, 0x95170e49, 0x823f6a89, 0xbb47c7c9, 0xac6fa309, + 0xc9b69d49, 0xde9ef989, 0xe7e654c9, 0xf0ce3009, 0x0d3cf0cb, + 0x1a14940b, 0x236c394b, 0x34445d8b, 0x519d63cb, 0x46b5070b, + 0x7fcdaa4b, 0x68e5ce8b, 0xb47fd6cb, 0xa357b20b, 0x9a2f1f4b, + 0x8d077b8b, 0xe8de45cb, 0xfff6210b, 0xc68e8c4b, 0xd1a6e88b, + 0xa4cbba8a, 0xb3e3de4a, 0x8a9b730a, 0x9db317ca, 0xf86a298a, + 0xef424d4a, 0xd63ae00a, 0xc11284ca, 0x1d889c8a, 0x0aa0f84a, + 0x33d8550a, 0x24f031ca, 0x41290f8a, 0x56016b4a, 0x6f79c60a, + 0x7851a2ca}, + {0x00000000, 0x9fda839e, 0xe4c4017d, 0x7b1e82e3, 0x12f904bb, + 0x8d238725, 0xf63d05c6, 0x69e78658, 0x25f20976, 0xba288ae8, + 0xc136080b, 0x5eec8b95, 0x370b0dcd, 0xa8d18e53, 0xd3cf0cb0, + 0x4c158f2e, 0x4be412ec, 0xd43e9172, 0xaf201391, 0x30fa900f, + 0x591d1657, 0xc6c795c9, 0xbdd9172a, 0x220394b4, 0x6e161b9a, + 0xf1cc9804, 0x8ad21ae7, 0x15089979, 0x7cef1f21, 0xe3359cbf, + 0x982b1e5c, 0x07f19dc2, 0x97c825d8, 0x0812a646, 0x730c24a5, + 0xecd6a73b, 0x85312163, 0x1aeba2fd, 0x61f5201e, 0xfe2fa380, + 0xb23a2cae, 0x2de0af30, 0x56fe2dd3, 0xc924ae4d, 0xa0c32815, + 0x3f19ab8b, 0x44072968, 0xdbddaaf6, 0xdc2c3734, 0x43f6b4aa, + 0x38e83649, 0xa732b5d7, 0xced5338f, 0x510fb011, 0x2a1132f2, + 0xb5cbb16c, 0xf9de3e42, 0x6604bddc, 0x1d1a3f3f, 0x82c0bca1, + 0xeb273af9, 0x74fdb967, 0x0fe33b84, 0x9039b81a, 0xf4e14df1, + 0x6b3bce6f, 0x10254c8c, 0x8fffcf12, 0xe618494a, 0x79c2cad4, + 0x02dc4837, 0x9d06cba9, 0xd1134487, 0x4ec9c719, 0x35d745fa, + 0xaa0dc664, 0xc3ea403c, 0x5c30c3a2, 0x272e4141, 0xb8f4c2df, + 0xbf055f1d, 0x20dfdc83, 0x5bc15e60, 0xc41bddfe, 0xadfc5ba6, + 0x3226d838, 0x49385adb, 0xd6e2d945, 0x9af7566b, 0x052dd5f5, + 0x7e335716, 0xe1e9d488, 0x880e52d0, 0x17d4d14e, 0x6cca53ad, + 0xf310d033, 0x63296829, 0xfcf3ebb7, 0x87ed6954, 0x1837eaca, + 0x71d06c92, 0xee0aef0c, 0x95146def, 0x0aceee71, 0x46db615f, + 0xd901e2c1, 0xa21f6022, 0x3dc5e3bc, 0x542265e4, 0xcbf8e67a, + 0xb0e66499, 0x2f3ce707, 0x28cd7ac5, 0xb717f95b, 0xcc097bb8, + 0x53d3f826, 0x3a347e7e, 0xa5eefde0, 0xdef07f03, 0x412afc9d, + 0x0d3f73b3, 0x92e5f02d, 0xe9fb72ce, 0x7621f150, 0x1fc67708, + 0x801cf496, 0xfb027675, 0x64d8f5eb, 0x32b39da3, 0xad691e3d, + 0xd6779cde, 0x49ad1f40, 0x204a9918, 0xbf901a86, 0xc48e9865, + 0x5b541bfb, 0x174194d5, 0x889b174b, 0xf38595a8, 0x6c5f1636, + 0x05b8906e, 0x9a6213f0, 0xe17c9113, 0x7ea6128d, 0x79578f4f, + 0xe68d0cd1, 0x9d938e32, 0x02490dac, 0x6bae8bf4, 0xf474086a, + 0x8f6a8a89, 0x10b00917, 0x5ca58639, 0xc37f05a7, 0xb8618744, + 0x27bb04da, 0x4e5c8282, 0xd186011c, 0xaa9883ff, 0x35420061, + 0xa57bb87b, 0x3aa13be5, 0x41bfb906, 0xde653a98, 0xb782bcc0, + 0x28583f5e, 0x5346bdbd, 0xcc9c3e23, 0x8089b10d, 0x1f533293, + 0x644db070, 0xfb9733ee, 0x9270b5b6, 0x0daa3628, 0x76b4b4cb, + 0xe96e3755, 0xee9faa97, 0x71452909, 0x0a5babea, 0x95812874, + 0xfc66ae2c, 0x63bc2db2, 0x18a2af51, 0x87782ccf, 0xcb6da3e1, + 0x54b7207f, 0x2fa9a29c, 0xb0732102, 0xd994a75a, 0x464e24c4, + 0x3d50a627, 0xa28a25b9, 0xc652d052, 0x598853cc, 0x2296d12f, + 0xbd4c52b1, 0xd4abd4e9, 0x4b715777, 0x306fd594, 0xafb5560a, + 0xe3a0d924, 0x7c7a5aba, 0x0764d859, 0x98be5bc7, 0xf159dd9f, + 0x6e835e01, 0x159ddce2, 0x8a475f7c, 0x8db6c2be, 0x126c4120, + 0x6972c3c3, 0xf6a8405d, 0x9f4fc605, 0x0095459b, 0x7b8bc778, + 0xe45144e6, 0xa844cbc8, 0x379e4856, 0x4c80cab5, 0xd35a492b, + 0xbabdcf73, 0x25674ced, 0x5e79ce0e, 0xc1a34d90, 0x519af58a, + 0xce407614, 0xb55ef4f7, 0x2a847769, 0x4363f131, 0xdcb972af, + 0xa7a7f04c, 0x387d73d2, 0x7468fcfc, 0xebb27f62, 0x90acfd81, + 0x0f767e1f, 0x6691f847, 0xf94b7bd9, 0x8255f93a, 0x1d8f7aa4, + 0x1a7ee766, 0x85a464f8, 0xfebae61b, 0x61606585, 0x0887e3dd, + 0x975d6043, 0xec43e2a0, 0x7399613e, 0x3f8cee10, 0xa0566d8e, + 0xdb48ef6d, 0x44926cf3, 0x2d75eaab, 0xb2af6935, 0xc9b1ebd6, + 0x566b6848}, + {0x00000000, 0x65673b46, 0xcace768c, 0xafa94dca, 0x4eedeb59, + 0x2b8ad01f, 0x84239dd5, 0xe144a693, 0x9ddbd6b2, 0xf8bcedf4, + 0x5715a03e, 0x32729b78, 0xd3363deb, 0xb65106ad, 0x19f84b67, + 0x7c9f7021, 0xe0c6ab25, 0x85a19063, 0x2a08dda9, 0x4f6fe6ef, + 0xae2b407c, 0xcb4c7b3a, 0x64e536f0, 0x01820db6, 0x7d1d7d97, + 0x187a46d1, 0xb7d30b1b, 0xd2b4305d, 0x33f096ce, 0x5697ad88, + 0xf93ee042, 0x9c59db04, 0x1afc500b, 0x7f9b6b4d, 0xd0322687, + 0xb5551dc1, 0x5411bb52, 0x31768014, 0x9edfcdde, 0xfbb8f698, + 0x872786b9, 0xe240bdff, 0x4de9f035, 0x288ecb73, 0xc9ca6de0, + 0xacad56a6, 0x03041b6c, 0x6663202a, 0xfa3afb2e, 0x9f5dc068, + 0x30f48da2, 0x5593b6e4, 0xb4d71077, 0xd1b02b31, 0x7e1966fb, + 0x1b7e5dbd, 0x67e12d9c, 0x028616da, 0xad2f5b10, 0xc8486056, + 0x290cc6c5, 0x4c6bfd83, 0xe3c2b049, 0x86a58b0f, 0x35f8a016, + 0x509f9b50, 0xff36d69a, 0x9a51eddc, 0x7b154b4f, 0x1e727009, + 0xb1db3dc3, 0xd4bc0685, 0xa82376a4, 0xcd444de2, 0x62ed0028, + 0x078a3b6e, 0xe6ce9dfd, 0x83a9a6bb, 0x2c00eb71, 0x4967d037, + 0xd53e0b33, 0xb0593075, 0x1ff07dbf, 0x7a9746f9, 0x9bd3e06a, + 0xfeb4db2c, 0x511d96e6, 0x347aada0, 0x48e5dd81, 0x2d82e6c7, + 0x822bab0d, 0xe74c904b, 0x060836d8, 0x636f0d9e, 0xccc64054, + 0xa9a17b12, 0x2f04f01d, 0x4a63cb5b, 0xe5ca8691, 0x80adbdd7, + 0x61e91b44, 0x048e2002, 0xab276dc8, 0xce40568e, 0xb2df26af, + 0xd7b81de9, 0x78115023, 0x1d766b65, 0xfc32cdf6, 0x9955f6b0, + 0x36fcbb7a, 0x539b803c, 0xcfc25b38, 0xaaa5607e, 0x050c2db4, + 0x606b16f2, 0x812fb061, 0xe4488b27, 0x4be1c6ed, 0x2e86fdab, + 0x52198d8a, 0x377eb6cc, 0x98d7fb06, 0xfdb0c040, 0x1cf466d3, + 0x79935d95, 0xd63a105f, 0xb35d2b19, 0x6bf1402c, 0x0e967b6a, + 0xa13f36a0, 0xc4580de6, 0x251cab75, 0x407b9033, 0xefd2ddf9, + 0x8ab5e6bf, 0xf62a969e, 0x934dadd8, 0x3ce4e012, 0x5983db54, + 0xb8c77dc7, 0xdda04681, 0x72090b4b, 0x176e300d, 0x8b37eb09, + 0xee50d04f, 0x41f99d85, 0x249ea6c3, 0xc5da0050, 0xa0bd3b16, + 0x0f1476dc, 0x6a734d9a, 0x16ec3dbb, 0x738b06fd, 0xdc224b37, + 0xb9457071, 0x5801d6e2, 0x3d66eda4, 0x92cfa06e, 0xf7a89b28, + 0x710d1027, 0x146a2b61, 0xbbc366ab, 0xdea45ded, 0x3fe0fb7e, + 0x5a87c038, 0xf52e8df2, 0x9049b6b4, 0xecd6c695, 0x89b1fdd3, + 0x2618b019, 0x437f8b5f, 0xa23b2dcc, 0xc75c168a, 0x68f55b40, + 0x0d926006, 0x91cbbb02, 0xf4ac8044, 0x5b05cd8e, 0x3e62f6c8, + 0xdf26505b, 0xba416b1d, 0x15e826d7, 0x708f1d91, 0x0c106db0, + 0x697756f6, 0xc6de1b3c, 0xa3b9207a, 0x42fd86e9, 0x279abdaf, + 0x8833f065, 0xed54cb23, 0x5e09e03a, 0x3b6edb7c, 0x94c796b6, + 0xf1a0adf0, 0x10e40b63, 0x75833025, 0xda2a7def, 0xbf4d46a9, + 0xc3d23688, 0xa6b50dce, 0x091c4004, 0x6c7b7b42, 0x8d3fddd1, + 0xe858e697, 0x47f1ab5d, 0x2296901b, 0xbecf4b1f, 0xdba87059, + 0x74013d93, 0x116606d5, 0xf022a046, 0x95459b00, 0x3aecd6ca, + 0x5f8bed8c, 0x23149dad, 0x4673a6eb, 0xe9daeb21, 0x8cbdd067, + 0x6df976f4, 0x089e4db2, 0xa7370078, 0xc2503b3e, 0x44f5b031, + 0x21928b77, 0x8e3bc6bd, 0xeb5cfdfb, 0x0a185b68, 0x6f7f602e, + 0xc0d62de4, 0xa5b116a2, 0xd92e6683, 0xbc495dc5, 0x13e0100f, + 0x76872b49, 0x97c38dda, 0xf2a4b69c, 0x5d0dfb56, 0x386ac010, + 0xa4331b14, 0xc1542052, 0x6efd6d98, 0x0b9a56de, 0xeadef04d, + 0x8fb9cb0b, 0x201086c1, 0x4577bd87, 0x39e8cda6, 0x5c8ff6e0, + 0xf326bb2a, 0x9641806c, 0x770526ff, 0x12621db9, 0xbdcb5073, + 0xd8ac6b35}, + {0x00000000, 0xd7e28058, 0x74b406f1, 0xa35686a9, 0xe9680de2, + 0x3e8a8dba, 0x9ddc0b13, 0x4a3e8b4b, 0x09a11d85, 0xde439ddd, + 0x7d151b74, 0xaaf79b2c, 0xe0c91067, 0x372b903f, 0x947d1696, + 0x439f96ce, 0x13423b0a, 0xc4a0bb52, 0x67f63dfb, 0xb014bda3, + 0xfa2a36e8, 0x2dc8b6b0, 0x8e9e3019, 0x597cb041, 0x1ae3268f, + 0xcd01a6d7, 0x6e57207e, 0xb9b5a026, 0xf38b2b6d, 0x2469ab35, + 0x873f2d9c, 0x50ddadc4, 0x26847614, 0xf166f64c, 0x523070e5, + 0x85d2f0bd, 0xcfec7bf6, 0x180efbae, 0xbb587d07, 0x6cbafd5f, + 0x2f256b91, 0xf8c7ebc9, 0x5b916d60, 0x8c73ed38, 0xc64d6673, + 0x11afe62b, 0xb2f96082, 0x651be0da, 0x35c64d1e, 0xe224cd46, + 0x41724bef, 0x9690cbb7, 0xdcae40fc, 0x0b4cc0a4, 0xa81a460d, + 0x7ff8c655, 0x3c67509b, 0xeb85d0c3, 0x48d3566a, 0x9f31d632, + 0xd50f5d79, 0x02eddd21, 0xa1bb5b88, 0x7659dbd0, 0x4d08ec28, + 0x9aea6c70, 0x39bcead9, 0xee5e6a81, 0xa460e1ca, 0x73826192, + 0xd0d4e73b, 0x07366763, 0x44a9f1ad, 0x934b71f5, 0x301df75c, + 0xe7ff7704, 0xadc1fc4f, 0x7a237c17, 0xd975fabe, 0x0e977ae6, + 0x5e4ad722, 0x89a8577a, 0x2afed1d3, 0xfd1c518b, 0xb722dac0, + 0x60c05a98, 0xc396dc31, 0x14745c69, 0x57ebcaa7, 0x80094aff, + 0x235fcc56, 0xf4bd4c0e, 0xbe83c745, 0x6961471d, 0xca37c1b4, + 0x1dd541ec, 0x6b8c9a3c, 0xbc6e1a64, 0x1f389ccd, 0xc8da1c95, + 0x82e497de, 0x55061786, 0xf650912f, 0x21b21177, 0x622d87b9, + 0xb5cf07e1, 0x16998148, 0xc17b0110, 0x8b458a5b, 0x5ca70a03, + 0xfff18caa, 0x28130cf2, 0x78cea136, 0xaf2c216e, 0x0c7aa7c7, + 0xdb98279f, 0x91a6acd4, 0x46442c8c, 0xe512aa25, 0x32f02a7d, + 0x716fbcb3, 0xa68d3ceb, 0x05dbba42, 0xd2393a1a, 0x9807b151, + 0x4fe53109, 0xecb3b7a0, 0x3b5137f8, 0x9a11d850, 0x4df35808, + 0xeea5dea1, 0x39475ef9, 0x7379d5b2, 0xa49b55ea, 0x07cdd343, + 0xd02f531b, 0x93b0c5d5, 0x4452458d, 0xe704c324, 0x30e6437c, + 0x7ad8c837, 0xad3a486f, 0x0e6ccec6, 0xd98e4e9e, 0x8953e35a, + 0x5eb16302, 0xfde7e5ab, 0x2a0565f3, 0x603beeb8, 0xb7d96ee0, + 0x148fe849, 0xc36d6811, 0x80f2fedf, 0x57107e87, 0xf446f82e, + 0x23a47876, 0x699af33d, 0xbe787365, 0x1d2ef5cc, 0xcacc7594, + 0xbc95ae44, 0x6b772e1c, 0xc821a8b5, 0x1fc328ed, 0x55fda3a6, + 0x821f23fe, 0x2149a557, 0xf6ab250f, 0xb534b3c1, 0x62d63399, + 0xc180b530, 0x16623568, 0x5c5cbe23, 0x8bbe3e7b, 0x28e8b8d2, + 0xff0a388a, 0xafd7954e, 0x78351516, 0xdb6393bf, 0x0c8113e7, + 0x46bf98ac, 0x915d18f4, 0x320b9e5d, 0xe5e91e05, 0xa67688cb, + 0x71940893, 0xd2c28e3a, 0x05200e62, 0x4f1e8529, 0x98fc0571, + 0x3baa83d8, 0xec480380, 0xd7193478, 0x00fbb420, 0xa3ad3289, + 0x744fb2d1, 0x3e71399a, 0xe993b9c2, 0x4ac53f6b, 0x9d27bf33, + 0xdeb829fd, 0x095aa9a5, 0xaa0c2f0c, 0x7deeaf54, 0x37d0241f, + 0xe032a447, 0x436422ee, 0x9486a2b6, 0xc45b0f72, 0x13b98f2a, + 0xb0ef0983, 0x670d89db, 0x2d330290, 0xfad182c8, 0x59870461, + 0x8e658439, 0xcdfa12f7, 0x1a1892af, 0xb94e1406, 0x6eac945e, + 0x24921f15, 0xf3709f4d, 0x502619e4, 0x87c499bc, 0xf19d426c, + 0x267fc234, 0x8529449d, 0x52cbc4c5, 0x18f54f8e, 0xcf17cfd6, + 0x6c41497f, 0xbba3c927, 0xf83c5fe9, 0x2fdedfb1, 0x8c885918, + 0x5b6ad940, 0x1154520b, 0xc6b6d253, 0x65e054fa, 0xb202d4a2, + 0xe2df7966, 0x353df93e, 0x966b7f97, 0x4189ffcf, 0x0bb77484, + 0xdc55f4dc, 0x7f037275, 0xa8e1f22d, 0xeb7e64e3, 0x3c9ce4bb, + 0x9fca6212, 0x4828e24a, 0x02166901, 0xd5f4e959, 0x76a26ff0, + 0xa140efa8}, + {0x00000000, 0xef52b6e1, 0x05d46b83, 0xea86dd62, 0x0ba8d706, + 0xe4fa61e7, 0x0e7cbc85, 0xe12e0a64, 0x1751ae0c, 0xf80318ed, + 0x1285c58f, 0xfdd7736e, 0x1cf9790a, 0xf3abcfeb, 0x192d1289, + 0xf67fa468, 0x2ea35c18, 0xc1f1eaf9, 0x2b77379b, 0xc425817a, + 0x250b8b1e, 0xca593dff, 0x20dfe09d, 0xcf8d567c, 0x39f2f214, + 0xd6a044f5, 0x3c269997, 0xd3742f76, 0x325a2512, 0xdd0893f3, + 0x378e4e91, 0xd8dcf870, 0x5d46b830, 0xb2140ed1, 0x5892d3b3, + 0xb7c06552, 0x56ee6f36, 0xb9bcd9d7, 0x533a04b5, 0xbc68b254, + 0x4a17163c, 0xa545a0dd, 0x4fc37dbf, 0xa091cb5e, 0x41bfc13a, + 0xaeed77db, 0x446baab9, 0xab391c58, 0x73e5e428, 0x9cb752c9, + 0x76318fab, 0x9963394a, 0x784d332e, 0x971f85cf, 0x7d9958ad, + 0x92cbee4c, 0x64b44a24, 0x8be6fcc5, 0x616021a7, 0x8e329746, + 0x6f1c9d22, 0x804e2bc3, 0x6ac8f6a1, 0x859a4040, 0xba8d7060, + 0x55dfc681, 0xbf591be3, 0x500bad02, 0xb125a766, 0x5e771187, + 0xb4f1cce5, 0x5ba37a04, 0xaddcde6c, 0x428e688d, 0xa808b5ef, + 0x475a030e, 0xa674096a, 0x4926bf8b, 0xa3a062e9, 0x4cf2d408, + 0x942e2c78, 0x7b7c9a99, 0x91fa47fb, 0x7ea8f11a, 0x9f86fb7e, + 0x70d44d9f, 0x9a5290fd, 0x7500261c, 0x837f8274, 0x6c2d3495, + 0x86abe9f7, 0x69f95f16, 0x88d75572, 0x6785e393, 0x8d033ef1, + 0x62518810, 0xe7cbc850, 0x08997eb1, 0xe21fa3d3, 0x0d4d1532, + 0xec631f56, 0x0331a9b7, 0xe9b774d5, 0x06e5c234, 0xf09a665c, + 0x1fc8d0bd, 0xf54e0ddf, 0x1a1cbb3e, 0xfb32b15a, 0x146007bb, + 0xfee6dad9, 0x11b46c38, 0xc9689448, 0x263a22a9, 0xccbcffcb, + 0x23ee492a, 0xc2c0434e, 0x2d92f5af, 0xc71428cd, 0x28469e2c, + 0xde393a44, 0x316b8ca5, 0xdbed51c7, 0x34bfe726, 0xd591ed42, + 0x3ac35ba3, 0xd04586c1, 0x3f173020, 0xae6be681, 0x41395060, + 0xabbf8d02, 0x44ed3be3, 0xa5c33187, 0x4a918766, 0xa0175a04, + 0x4f45ece5, 0xb93a488d, 0x5668fe6c, 0xbcee230e, 0x53bc95ef, + 0xb2929f8b, 0x5dc0296a, 0xb746f408, 0x581442e9, 0x80c8ba99, + 0x6f9a0c78, 0x851cd11a, 0x6a4e67fb, 0x8b606d9f, 0x6432db7e, + 0x8eb4061c, 0x61e6b0fd, 0x97991495, 0x78cba274, 0x924d7f16, + 0x7d1fc9f7, 0x9c31c393, 0x73637572, 0x99e5a810, 0x76b71ef1, + 0xf32d5eb1, 0x1c7fe850, 0xf6f93532, 0x19ab83d3, 0xf88589b7, + 0x17d73f56, 0xfd51e234, 0x120354d5, 0xe47cf0bd, 0x0b2e465c, + 0xe1a89b3e, 0x0efa2ddf, 0xefd427bb, 0x0086915a, 0xea004c38, + 0x0552fad9, 0xdd8e02a9, 0x32dcb448, 0xd85a692a, 0x3708dfcb, + 0xd626d5af, 0x3974634e, 0xd3f2be2c, 0x3ca008cd, 0xcadfaca5, + 0x258d1a44, 0xcf0bc726, 0x205971c7, 0xc1777ba3, 0x2e25cd42, + 0xc4a31020, 0x2bf1a6c1, 0x14e696e1, 0xfbb42000, 0x1132fd62, + 0xfe604b83, 0x1f4e41e7, 0xf01cf706, 0x1a9a2a64, 0xf5c89c85, + 0x03b738ed, 0xece58e0c, 0x0663536e, 0xe931e58f, 0x081fefeb, + 0xe74d590a, 0x0dcb8468, 0xe2993289, 0x3a45caf9, 0xd5177c18, + 0x3f91a17a, 0xd0c3179b, 0x31ed1dff, 0xdebfab1e, 0x3439767c, + 0xdb6bc09d, 0x2d1464f5, 0xc246d214, 0x28c00f76, 0xc792b997, + 0x26bcb3f3, 0xc9ee0512, 0x2368d870, 0xcc3a6e91, 0x49a02ed1, + 0xa6f29830, 0x4c744552, 0xa326f3b3, 0x4208f9d7, 0xad5a4f36, + 0x47dc9254, 0xa88e24b5, 0x5ef180dd, 0xb1a3363c, 0x5b25eb5e, + 0xb4775dbf, 0x555957db, 0xba0be13a, 0x508d3c58, 0xbfdf8ab9, + 0x670372c9, 0x8851c428, 0x62d7194a, 0x8d85afab, 0x6caba5cf, + 0x83f9132e, 0x697fce4c, 0x862d78ad, 0x7052dcc5, 0x9f006a24, + 0x7586b746, 0x9ad401a7, 0x7bfa0bc3, 0x94a8bd22, 0x7e2e6040, + 0x917cd6a1}, + {0x00000000, 0x87a6cb43, 0xd43c90c7, 0x539a5b84, 0x730827cf, + 0xf4aeec8c, 0xa734b708, 0x20927c4b, 0xe6104f9e, 0x61b684dd, + 0x322cdf59, 0xb58a141a, 0x95186851, 0x12bea312, 0x4124f896, + 0xc68233d5, 0x1751997d, 0x90f7523e, 0xc36d09ba, 0x44cbc2f9, + 0x6459beb2, 0xe3ff75f1, 0xb0652e75, 0x37c3e536, 0xf141d6e3, + 0x76e71da0, 0x257d4624, 0xa2db8d67, 0x8249f12c, 0x05ef3a6f, + 0x567561eb, 0xd1d3aaa8, 0x2ea332fa, 0xa905f9b9, 0xfa9fa23d, + 0x7d39697e, 0x5dab1535, 0xda0dde76, 0x899785f2, 0x0e314eb1, + 0xc8b37d64, 0x4f15b627, 0x1c8feda3, 0x9b2926e0, 0xbbbb5aab, + 0x3c1d91e8, 0x6f87ca6c, 0xe821012f, 0x39f2ab87, 0xbe5460c4, + 0xedce3b40, 0x6a68f003, 0x4afa8c48, 0xcd5c470b, 0x9ec61c8f, + 0x1960d7cc, 0xdfe2e419, 0x58442f5a, 0x0bde74de, 0x8c78bf9d, + 0xaceac3d6, 0x2b4c0895, 0x78d65311, 0xff709852, 0x5d4665f4, + 0xdae0aeb7, 0x897af533, 0x0edc3e70, 0x2e4e423b, 0xa9e88978, + 0xfa72d2fc, 0x7dd419bf, 0xbb562a6a, 0x3cf0e129, 0x6f6abaad, + 0xe8cc71ee, 0xc85e0da5, 0x4ff8c6e6, 0x1c629d62, 0x9bc45621, + 0x4a17fc89, 0xcdb137ca, 0x9e2b6c4e, 0x198da70d, 0x391fdb46, + 0xbeb91005, 0xed234b81, 0x6a8580c2, 0xac07b317, 0x2ba17854, + 0x783b23d0, 0xff9de893, 0xdf0f94d8, 0x58a95f9b, 0x0b33041f, + 0x8c95cf5c, 0x73e5570e, 0xf4439c4d, 0xa7d9c7c9, 0x207f0c8a, + 0x00ed70c1, 0x874bbb82, 0xd4d1e006, 0x53772b45, 0x95f51890, + 0x1253d3d3, 0x41c98857, 0xc66f4314, 0xe6fd3f5f, 0x615bf41c, + 0x32c1af98, 0xb56764db, 0x64b4ce73, 0xe3120530, 0xb0885eb4, + 0x372e95f7, 0x17bce9bc, 0x901a22ff, 0xc380797b, 0x4426b238, + 0x82a481ed, 0x05024aae, 0x5698112a, 0xd13eda69, 0xf1aca622, + 0x760a6d61, 0x259036e5, 0xa236fda6, 0xba8ccbe8, 0x3d2a00ab, + 0x6eb05b2f, 0xe916906c, 0xc984ec27, 0x4e222764, 0x1db87ce0, + 0x9a1eb7a3, 0x5c9c8476, 0xdb3a4f35, 0x88a014b1, 0x0f06dff2, + 0x2f94a3b9, 0xa83268fa, 0xfba8337e, 0x7c0ef83d, 0xaddd5295, + 0x2a7b99d6, 0x79e1c252, 0xfe470911, 0xded5755a, 0x5973be19, + 0x0ae9e59d, 0x8d4f2ede, 0x4bcd1d0b, 0xcc6bd648, 0x9ff18dcc, + 0x1857468f, 0x38c53ac4, 0xbf63f187, 0xecf9aa03, 0x6b5f6140, + 0x942ff912, 0x13893251, 0x401369d5, 0xc7b5a296, 0xe727dedd, + 0x6081159e, 0x331b4e1a, 0xb4bd8559, 0x723fb68c, 0xf5997dcf, + 0xa603264b, 0x21a5ed08, 0x01379143, 0x86915a00, 0xd50b0184, + 0x52adcac7, 0x837e606f, 0x04d8ab2c, 0x5742f0a8, 0xd0e43beb, + 0xf07647a0, 0x77d08ce3, 0x244ad767, 0xa3ec1c24, 0x656e2ff1, + 0xe2c8e4b2, 0xb152bf36, 0x36f47475, 0x1666083e, 0x91c0c37d, + 0xc25a98f9, 0x45fc53ba, 0xe7caae1c, 0x606c655f, 0x33f63edb, + 0xb450f598, 0x94c289d3, 0x13644290, 0x40fe1914, 0xc758d257, + 0x01dae182, 0x867c2ac1, 0xd5e67145, 0x5240ba06, 0x72d2c64d, + 0xf5740d0e, 0xa6ee568a, 0x21489dc9, 0xf09b3761, 0x773dfc22, + 0x24a7a7a6, 0xa3016ce5, 0x839310ae, 0x0435dbed, 0x57af8069, + 0xd0094b2a, 0x168b78ff, 0x912db3bc, 0xc2b7e838, 0x4511237b, + 0x65835f30, 0xe2259473, 0xb1bfcff7, 0x361904b4, 0xc9699ce6, + 0x4ecf57a5, 0x1d550c21, 0x9af3c762, 0xba61bb29, 0x3dc7706a, + 0x6e5d2bee, 0xe9fbe0ad, 0x2f79d378, 0xa8df183b, 0xfb4543bf, + 0x7ce388fc, 0x5c71f4b7, 0xdbd73ff4, 0x884d6470, 0x0febaf33, + 0xde38059b, 0x599eced8, 0x0a04955c, 0x8da25e1f, 0xad302254, + 0x2a96e917, 0x790cb293, 0xfeaa79d0, 0x38284a05, 0xbf8e8146, + 0xec14dac2, 0x6bb21181, 0x4b206dca, 0xcc86a689, 0x9f1cfd0d, + 0x18ba364e}}; + +local const z_word_t FAR crc_braid_big_table[][256] = { + {0x0000000000000000, 0x43cba68700000000, 0xc7903cd400000000, + 0x845b9a5300000000, 0xcf27087300000000, 0x8cecaef400000000, + 0x08b734a700000000, 0x4b7c922000000000, 0x9e4f10e600000000, + 0xdd84b66100000000, 0x59df2c3200000000, 0x1a148ab500000000, + 0x5168189500000000, 0x12a3be1200000000, 0x96f8244100000000, + 0xd53382c600000000, 0x7d99511700000000, 0x3e52f79000000000, + 0xba096dc300000000, 0xf9c2cb4400000000, 0xb2be596400000000, + 0xf175ffe300000000, 0x752e65b000000000, 0x36e5c33700000000, + 0xe3d641f100000000, 0xa01de77600000000, 0x24467d2500000000, + 0x678ddba200000000, 0x2cf1498200000000, 0x6f3aef0500000000, + 0xeb61755600000000, 0xa8aad3d100000000, 0xfa32a32e00000000, + 0xb9f905a900000000, 0x3da29ffa00000000, 0x7e69397d00000000, + 0x3515ab5d00000000, 0x76de0dda00000000, 0xf285978900000000, + 0xb14e310e00000000, 0x647db3c800000000, 0x27b6154f00000000, + 0xa3ed8f1c00000000, 0xe026299b00000000, 0xab5abbbb00000000, + 0xe8911d3c00000000, 0x6cca876f00000000, 0x2f0121e800000000, + 0x87abf23900000000, 0xc46054be00000000, 0x403bceed00000000, + 0x03f0686a00000000, 0x488cfa4a00000000, 0x0b475ccd00000000, + 0x8f1cc69e00000000, 0xccd7601900000000, 0x19e4e2df00000000, + 0x5a2f445800000000, 0xde74de0b00000000, 0x9dbf788c00000000, + 0xd6c3eaac00000000, 0x95084c2b00000000, 0x1153d67800000000, + 0x529870ff00000000, 0xf465465d00000000, 0xb7aee0da00000000, + 0x33f57a8900000000, 0x703edc0e00000000, 0x3b424e2e00000000, + 0x7889e8a900000000, 0xfcd272fa00000000, 0xbf19d47d00000000, + 0x6a2a56bb00000000, 0x29e1f03c00000000, 0xadba6a6f00000000, + 0xee71cce800000000, 0xa50d5ec800000000, 0xe6c6f84f00000000, + 0x629d621c00000000, 0x2156c49b00000000, 0x89fc174a00000000, + 0xca37b1cd00000000, 0x4e6c2b9e00000000, 0x0da78d1900000000, + 0x46db1f3900000000, 0x0510b9be00000000, 0x814b23ed00000000, + 0xc280856a00000000, 0x17b307ac00000000, 0x5478a12b00000000, + 0xd0233b7800000000, 0x93e89dff00000000, 0xd8940fdf00000000, + 0x9b5fa95800000000, 0x1f04330b00000000, 0x5ccf958c00000000, + 0x0e57e57300000000, 0x4d9c43f400000000, 0xc9c7d9a700000000, + 0x8a0c7f2000000000, 0xc170ed0000000000, 0x82bb4b8700000000, + 0x06e0d1d400000000, 0x452b775300000000, 0x9018f59500000000, + 0xd3d3531200000000, 0x5788c94100000000, 0x14436fc600000000, + 0x5f3ffde600000000, 0x1cf45b6100000000, 0x98afc13200000000, + 0xdb6467b500000000, 0x73ceb46400000000, 0x300512e300000000, + 0xb45e88b000000000, 0xf7952e3700000000, 0xbce9bc1700000000, + 0xff221a9000000000, 0x7b7980c300000000, 0x38b2264400000000, + 0xed81a48200000000, 0xae4a020500000000, 0x2a11985600000000, + 0x69da3ed100000000, 0x22a6acf100000000, 0x616d0a7600000000, + 0xe536902500000000, 0xa6fd36a200000000, 0xe8cb8cba00000000, + 0xab002a3d00000000, 0x2f5bb06e00000000, 0x6c9016e900000000, + 0x27ec84c900000000, 0x6427224e00000000, 0xe07cb81d00000000, + 0xa3b71e9a00000000, 0x76849c5c00000000, 0x354f3adb00000000, + 0xb114a08800000000, 0xf2df060f00000000, 0xb9a3942f00000000, + 0xfa6832a800000000, 0x7e33a8fb00000000, 0x3df80e7c00000000, + 0x9552ddad00000000, 0xd6997b2a00000000, 0x52c2e17900000000, + 0x110947fe00000000, 0x5a75d5de00000000, 0x19be735900000000, + 0x9de5e90a00000000, 0xde2e4f8d00000000, 0x0b1dcd4b00000000, + 0x48d66bcc00000000, 0xcc8df19f00000000, 0x8f46571800000000, + 0xc43ac53800000000, 0x87f163bf00000000, 0x03aaf9ec00000000, + 0x40615f6b00000000, 0x12f92f9400000000, 0x5132891300000000, + 0xd569134000000000, 0x96a2b5c700000000, 0xddde27e700000000, + 0x9e15816000000000, 0x1a4e1b3300000000, 0x5985bdb400000000, + 0x8cb63f7200000000, 0xcf7d99f500000000, 0x4b2603a600000000, + 0x08eda52100000000, 0x4391370100000000, 0x005a918600000000, + 0x84010bd500000000, 0xc7caad5200000000, 0x6f607e8300000000, + 0x2cabd80400000000, 0xa8f0425700000000, 0xeb3be4d000000000, + 0xa04776f000000000, 0xe38cd07700000000, 0x67d74a2400000000, + 0x241ceca300000000, 0xf12f6e6500000000, 0xb2e4c8e200000000, + 0x36bf52b100000000, 0x7574f43600000000, 0x3e08661600000000, + 0x7dc3c09100000000, 0xf9985ac200000000, 0xba53fc4500000000, + 0x1caecae700000000, 0x5f656c6000000000, 0xdb3ef63300000000, + 0x98f550b400000000, 0xd389c29400000000, 0x9042641300000000, + 0x1419fe4000000000, 0x57d258c700000000, 0x82e1da0100000000, + 0xc12a7c8600000000, 0x4571e6d500000000, 0x06ba405200000000, + 0x4dc6d27200000000, 0x0e0d74f500000000, 0x8a56eea600000000, + 0xc99d482100000000, 0x61379bf000000000, 0x22fc3d7700000000, + 0xa6a7a72400000000, 0xe56c01a300000000, 0xae10938300000000, + 0xeddb350400000000, 0x6980af5700000000, 0x2a4b09d000000000, + 0xff788b1600000000, 0xbcb32d9100000000, 0x38e8b7c200000000, + 0x7b23114500000000, 0x305f836500000000, 0x739425e200000000, + 0xf7cfbfb100000000, 0xb404193600000000, 0xe69c69c900000000, + 0xa557cf4e00000000, 0x210c551d00000000, 0x62c7f39a00000000, + 0x29bb61ba00000000, 0x6a70c73d00000000, 0xee2b5d6e00000000, + 0xade0fbe900000000, 0x78d3792f00000000, 0x3b18dfa800000000, + 0xbf4345fb00000000, 0xfc88e37c00000000, 0xb7f4715c00000000, + 0xf43fd7db00000000, 0x70644d8800000000, 0x33afeb0f00000000, + 0x9b0538de00000000, 0xd8ce9e5900000000, 0x5c95040a00000000, + 0x1f5ea28d00000000, 0x542230ad00000000, 0x17e9962a00000000, + 0x93b20c7900000000, 0xd079aafe00000000, 0x054a283800000000, + 0x46818ebf00000000, 0xc2da14ec00000000, 0x8111b26b00000000, + 0xca6d204b00000000, 0x89a686cc00000000, 0x0dfd1c9f00000000, + 0x4e36ba1800000000}, + {0x0000000000000000, 0xe1b652ef00000000, 0x836bd40500000000, + 0x62dd86ea00000000, 0x06d7a80b00000000, 0xe761fae400000000, + 0x85bc7c0e00000000, 0x640a2ee100000000, 0x0cae511700000000, + 0xed1803f800000000, 0x8fc5851200000000, 0x6e73d7fd00000000, + 0x0a79f91c00000000, 0xebcfabf300000000, 0x89122d1900000000, + 0x68a47ff600000000, 0x185ca32e00000000, 0xf9eaf1c100000000, + 0x9b37772b00000000, 0x7a8125c400000000, 0x1e8b0b2500000000, + 0xff3d59ca00000000, 0x9de0df2000000000, 0x7c568dcf00000000, + 0x14f2f23900000000, 0xf544a0d600000000, 0x9799263c00000000, + 0x762f74d300000000, 0x12255a3200000000, 0xf39308dd00000000, + 0x914e8e3700000000, 0x70f8dcd800000000, 0x30b8465d00000000, + 0xd10e14b200000000, 0xb3d3925800000000, 0x5265c0b700000000, + 0x366fee5600000000, 0xd7d9bcb900000000, 0xb5043a5300000000, + 0x54b268bc00000000, 0x3c16174a00000000, 0xdda045a500000000, + 0xbf7dc34f00000000, 0x5ecb91a000000000, 0x3ac1bf4100000000, + 0xdb77edae00000000, 0xb9aa6b4400000000, 0x581c39ab00000000, + 0x28e4e57300000000, 0xc952b79c00000000, 0xab8f317600000000, + 0x4a39639900000000, 0x2e334d7800000000, 0xcf851f9700000000, + 0xad58997d00000000, 0x4ceecb9200000000, 0x244ab46400000000, + 0xc5fce68b00000000, 0xa721606100000000, 0x4697328e00000000, + 0x229d1c6f00000000, 0xc32b4e8000000000, 0xa1f6c86a00000000, + 0x40409a8500000000, 0x60708dba00000000, 0x81c6df5500000000, + 0xe31b59bf00000000, 0x02ad0b5000000000, 0x66a725b100000000, + 0x8711775e00000000, 0xe5ccf1b400000000, 0x047aa35b00000000, + 0x6cdedcad00000000, 0x8d688e4200000000, 0xefb508a800000000, + 0x0e035a4700000000, 0x6a0974a600000000, 0x8bbf264900000000, + 0xe962a0a300000000, 0x08d4f24c00000000, 0x782c2e9400000000, + 0x999a7c7b00000000, 0xfb47fa9100000000, 0x1af1a87e00000000, + 0x7efb869f00000000, 0x9f4dd47000000000, 0xfd90529a00000000, + 0x1c26007500000000, 0x74827f8300000000, 0x95342d6c00000000, + 0xf7e9ab8600000000, 0x165ff96900000000, 0x7255d78800000000, + 0x93e3856700000000, 0xf13e038d00000000, 0x1088516200000000, + 0x50c8cbe700000000, 0xb17e990800000000, 0xd3a31fe200000000, + 0x32154d0d00000000, 0x561f63ec00000000, 0xb7a9310300000000, + 0xd574b7e900000000, 0x34c2e50600000000, 0x5c669af000000000, + 0xbdd0c81f00000000, 0xdf0d4ef500000000, 0x3ebb1c1a00000000, + 0x5ab132fb00000000, 0xbb07601400000000, 0xd9dae6fe00000000, + 0x386cb41100000000, 0x489468c900000000, 0xa9223a2600000000, + 0xcbffbccc00000000, 0x2a49ee2300000000, 0x4e43c0c200000000, + 0xaff5922d00000000, 0xcd2814c700000000, 0x2c9e462800000000, + 0x443a39de00000000, 0xa58c6b3100000000, 0xc751eddb00000000, + 0x26e7bf3400000000, 0x42ed91d500000000, 0xa35bc33a00000000, + 0xc18645d000000000, 0x2030173f00000000, 0x81e66bae00000000, + 0x6050394100000000, 0x028dbfab00000000, 0xe33bed4400000000, + 0x8731c3a500000000, 0x6687914a00000000, 0x045a17a000000000, + 0xe5ec454f00000000, 0x8d483ab900000000, 0x6cfe685600000000, + 0x0e23eebc00000000, 0xef95bc5300000000, 0x8b9f92b200000000, + 0x6a29c05d00000000, 0x08f446b700000000, 0xe942145800000000, + 0x99bac88000000000, 0x780c9a6f00000000, 0x1ad11c8500000000, + 0xfb674e6a00000000, 0x9f6d608b00000000, 0x7edb326400000000, + 0x1c06b48e00000000, 0xfdb0e66100000000, 0x9514999700000000, + 0x74a2cb7800000000, 0x167f4d9200000000, 0xf7c91f7d00000000, + 0x93c3319c00000000, 0x7275637300000000, 0x10a8e59900000000, + 0xf11eb77600000000, 0xb15e2df300000000, 0x50e87f1c00000000, + 0x3235f9f600000000, 0xd383ab1900000000, 0xb78985f800000000, + 0x563fd71700000000, 0x34e251fd00000000, 0xd554031200000000, + 0xbdf07ce400000000, 0x5c462e0b00000000, 0x3e9ba8e100000000, + 0xdf2dfa0e00000000, 0xbb27d4ef00000000, 0x5a91860000000000, + 0x384c00ea00000000, 0xd9fa520500000000, 0xa9028edd00000000, + 0x48b4dc3200000000, 0x2a695ad800000000, 0xcbdf083700000000, + 0xafd526d600000000, 0x4e63743900000000, 0x2cbef2d300000000, + 0xcd08a03c00000000, 0xa5acdfca00000000, 0x441a8d2500000000, + 0x26c70bcf00000000, 0xc771592000000000, 0xa37b77c100000000, + 0x42cd252e00000000, 0x2010a3c400000000, 0xc1a6f12b00000000, + 0xe196e61400000000, 0x0020b4fb00000000, 0x62fd321100000000, + 0x834b60fe00000000, 0xe7414e1f00000000, 0x06f71cf000000000, + 0x642a9a1a00000000, 0x859cc8f500000000, 0xed38b70300000000, + 0x0c8ee5ec00000000, 0x6e53630600000000, 0x8fe531e900000000, + 0xebef1f0800000000, 0x0a594de700000000, 0x6884cb0d00000000, + 0x893299e200000000, 0xf9ca453a00000000, 0x187c17d500000000, + 0x7aa1913f00000000, 0x9b17c3d000000000, 0xff1ded3100000000, + 0x1eabbfde00000000, 0x7c76393400000000, 0x9dc06bdb00000000, + 0xf564142d00000000, 0x14d246c200000000, 0x760fc02800000000, + 0x97b992c700000000, 0xf3b3bc2600000000, 0x1205eec900000000, + 0x70d8682300000000, 0x916e3acc00000000, 0xd12ea04900000000, + 0x3098f2a600000000, 0x5245744c00000000, 0xb3f326a300000000, + 0xd7f9084200000000, 0x364f5aad00000000, 0x5492dc4700000000, + 0xb5248ea800000000, 0xdd80f15e00000000, 0x3c36a3b100000000, + 0x5eeb255b00000000, 0xbf5d77b400000000, 0xdb57595500000000, + 0x3ae10bba00000000, 0x583c8d5000000000, 0xb98adfbf00000000, + 0xc972036700000000, 0x28c4518800000000, 0x4a19d76200000000, + 0xabaf858d00000000, 0xcfa5ab6c00000000, 0x2e13f98300000000, + 0x4cce7f6900000000, 0xad782d8600000000, 0xc5dc527000000000, + 0x246a009f00000000, 0x46b7867500000000, 0xa701d49a00000000, + 0xc30bfa7b00000000, 0x22bda89400000000, 0x40602e7e00000000, + 0xa1d67c9100000000}, + {0x0000000000000000, 0x5880e2d700000000, 0xf106b47400000000, + 0xa98656a300000000, 0xe20d68e900000000, 0xba8d8a3e00000000, + 0x130bdc9d00000000, 0x4b8b3e4a00000000, 0x851da10900000000, + 0xdd9d43de00000000, 0x741b157d00000000, 0x2c9bf7aa00000000, + 0x6710c9e000000000, 0x3f902b3700000000, 0x96167d9400000000, + 0xce969f4300000000, 0x0a3b421300000000, 0x52bba0c400000000, + 0xfb3df66700000000, 0xa3bd14b000000000, 0xe8362afa00000000, + 0xb0b6c82d00000000, 0x19309e8e00000000, 0x41b07c5900000000, + 0x8f26e31a00000000, 0xd7a601cd00000000, 0x7e20576e00000000, + 0x26a0b5b900000000, 0x6d2b8bf300000000, 0x35ab692400000000, + 0x9c2d3f8700000000, 0xc4addd5000000000, 0x1476842600000000, + 0x4cf666f100000000, 0xe570305200000000, 0xbdf0d28500000000, + 0xf67beccf00000000, 0xaefb0e1800000000, 0x077d58bb00000000, + 0x5ffdba6c00000000, 0x916b252f00000000, 0xc9ebc7f800000000, + 0x606d915b00000000, 0x38ed738c00000000, 0x73664dc600000000, + 0x2be6af1100000000, 0x8260f9b200000000, 0xdae01b6500000000, + 0x1e4dc63500000000, 0x46cd24e200000000, 0xef4b724100000000, + 0xb7cb909600000000, 0xfc40aedc00000000, 0xa4c04c0b00000000, + 0x0d461aa800000000, 0x55c6f87f00000000, 0x9b50673c00000000, + 0xc3d085eb00000000, 0x6a56d34800000000, 0x32d6319f00000000, + 0x795d0fd500000000, 0x21dded0200000000, 0x885bbba100000000, + 0xd0db597600000000, 0x28ec084d00000000, 0x706cea9a00000000, + 0xd9eabc3900000000, 0x816a5eee00000000, 0xcae160a400000000, + 0x9261827300000000, 0x3be7d4d000000000, 0x6367360700000000, + 0xadf1a94400000000, 0xf5714b9300000000, 0x5cf71d3000000000, + 0x0477ffe700000000, 0x4ffcc1ad00000000, 0x177c237a00000000, + 0xbefa75d900000000, 0xe67a970e00000000, 0x22d74a5e00000000, + 0x7a57a88900000000, 0xd3d1fe2a00000000, 0x8b511cfd00000000, + 0xc0da22b700000000, 0x985ac06000000000, 0x31dc96c300000000, + 0x695c741400000000, 0xa7caeb5700000000, 0xff4a098000000000, + 0x56cc5f2300000000, 0x0e4cbdf400000000, 0x45c783be00000000, + 0x1d47616900000000, 0xb4c137ca00000000, 0xec41d51d00000000, + 0x3c9a8c6b00000000, 0x641a6ebc00000000, 0xcd9c381f00000000, + 0x951cdac800000000, 0xde97e48200000000, 0x8617065500000000, + 0x2f9150f600000000, 0x7711b22100000000, 0xb9872d6200000000, + 0xe107cfb500000000, 0x4881991600000000, 0x10017bc100000000, + 0x5b8a458b00000000, 0x030aa75c00000000, 0xaa8cf1ff00000000, + 0xf20c132800000000, 0x36a1ce7800000000, 0x6e212caf00000000, + 0xc7a77a0c00000000, 0x9f2798db00000000, 0xd4aca69100000000, + 0x8c2c444600000000, 0x25aa12e500000000, 0x7d2af03200000000, + 0xb3bc6f7100000000, 0xeb3c8da600000000, 0x42badb0500000000, + 0x1a3a39d200000000, 0x51b1079800000000, 0x0931e54f00000000, + 0xa0b7b3ec00000000, 0xf837513b00000000, 0x50d8119a00000000, + 0x0858f34d00000000, 0xa1dea5ee00000000, 0xf95e473900000000, + 0xb2d5797300000000, 0xea559ba400000000, 0x43d3cd0700000000, + 0x1b532fd000000000, 0xd5c5b09300000000, 0x8d45524400000000, + 0x24c304e700000000, 0x7c43e63000000000, 0x37c8d87a00000000, + 0x6f483aad00000000, 0xc6ce6c0e00000000, 0x9e4e8ed900000000, + 0x5ae3538900000000, 0x0263b15e00000000, 0xabe5e7fd00000000, + 0xf365052a00000000, 0xb8ee3b6000000000, 0xe06ed9b700000000, + 0x49e88f1400000000, 0x11686dc300000000, 0xdffef28000000000, + 0x877e105700000000, 0x2ef846f400000000, 0x7678a42300000000, + 0x3df39a6900000000, 0x657378be00000000, 0xccf52e1d00000000, + 0x9475ccca00000000, 0x44ae95bc00000000, 0x1c2e776b00000000, + 0xb5a821c800000000, 0xed28c31f00000000, 0xa6a3fd5500000000, + 0xfe231f8200000000, 0x57a5492100000000, 0x0f25abf600000000, + 0xc1b334b500000000, 0x9933d66200000000, 0x30b580c100000000, + 0x6835621600000000, 0x23be5c5c00000000, 0x7b3ebe8b00000000, + 0xd2b8e82800000000, 0x8a380aff00000000, 0x4e95d7af00000000, + 0x1615357800000000, 0xbf9363db00000000, 0xe713810c00000000, + 0xac98bf4600000000, 0xf4185d9100000000, 0x5d9e0b3200000000, + 0x051ee9e500000000, 0xcb8876a600000000, 0x9308947100000000, + 0x3a8ec2d200000000, 0x620e200500000000, 0x29851e4f00000000, + 0x7105fc9800000000, 0xd883aa3b00000000, 0x800348ec00000000, + 0x783419d700000000, 0x20b4fb0000000000, 0x8932ada300000000, + 0xd1b24f7400000000, 0x9a39713e00000000, 0xc2b993e900000000, + 0x6b3fc54a00000000, 0x33bf279d00000000, 0xfd29b8de00000000, + 0xa5a95a0900000000, 0x0c2f0caa00000000, 0x54afee7d00000000, + 0x1f24d03700000000, 0x47a432e000000000, 0xee22644300000000, + 0xb6a2869400000000, 0x720f5bc400000000, 0x2a8fb91300000000, + 0x8309efb000000000, 0xdb890d6700000000, 0x9002332d00000000, + 0xc882d1fa00000000, 0x6104875900000000, 0x3984658e00000000, + 0xf712facd00000000, 0xaf92181a00000000, 0x06144eb900000000, + 0x5e94ac6e00000000, 0x151f922400000000, 0x4d9f70f300000000, + 0xe419265000000000, 0xbc99c48700000000, 0x6c429df100000000, + 0x34c27f2600000000, 0x9d44298500000000, 0xc5c4cb5200000000, + 0x8e4ff51800000000, 0xd6cf17cf00000000, 0x7f49416c00000000, + 0x27c9a3bb00000000, 0xe95f3cf800000000, 0xb1dfde2f00000000, + 0x1859888c00000000, 0x40d96a5b00000000, 0x0b52541100000000, + 0x53d2b6c600000000, 0xfa54e06500000000, 0xa2d402b200000000, + 0x6679dfe200000000, 0x3ef93d3500000000, 0x977f6b9600000000, + 0xcfff894100000000, 0x8474b70b00000000, 0xdcf455dc00000000, + 0x7572037f00000000, 0x2df2e1a800000000, 0xe3647eeb00000000, + 0xbbe49c3c00000000, 0x1262ca9f00000000, 0x4ae2284800000000, + 0x0169160200000000, 0x59e9f4d500000000, 0xf06fa27600000000, + 0xa8ef40a100000000}, + {0x0000000000000000, 0x463b676500000000, 0x8c76ceca00000000, + 0xca4da9af00000000, 0x59ebed4e00000000, 0x1fd08a2b00000000, + 0xd59d238400000000, 0x93a644e100000000, 0xb2d6db9d00000000, + 0xf4edbcf800000000, 0x3ea0155700000000, 0x789b723200000000, + 0xeb3d36d300000000, 0xad0651b600000000, 0x674bf81900000000, + 0x21709f7c00000000, 0x25abc6e000000000, 0x6390a18500000000, + 0xa9dd082a00000000, 0xefe66f4f00000000, 0x7c402bae00000000, + 0x3a7b4ccb00000000, 0xf036e56400000000, 0xb60d820100000000, + 0x977d1d7d00000000, 0xd1467a1800000000, 0x1b0bd3b700000000, + 0x5d30b4d200000000, 0xce96f03300000000, 0x88ad975600000000, + 0x42e03ef900000000, 0x04db599c00000000, 0x0b50fc1a00000000, + 0x4d6b9b7f00000000, 0x872632d000000000, 0xc11d55b500000000, + 0x52bb115400000000, 0x1480763100000000, 0xdecddf9e00000000, + 0x98f6b8fb00000000, 0xb986278700000000, 0xffbd40e200000000, + 0x35f0e94d00000000, 0x73cb8e2800000000, 0xe06dcac900000000, + 0xa656adac00000000, 0x6c1b040300000000, 0x2a20636600000000, + 0x2efb3afa00000000, 0x68c05d9f00000000, 0xa28df43000000000, + 0xe4b6935500000000, 0x7710d7b400000000, 0x312bb0d100000000, + 0xfb66197e00000000, 0xbd5d7e1b00000000, 0x9c2de16700000000, + 0xda16860200000000, 0x105b2fad00000000, 0x566048c800000000, + 0xc5c60c2900000000, 0x83fd6b4c00000000, 0x49b0c2e300000000, + 0x0f8ba58600000000, 0x16a0f83500000000, 0x509b9f5000000000, + 0x9ad636ff00000000, 0xdced519a00000000, 0x4f4b157b00000000, + 0x0970721e00000000, 0xc33ddbb100000000, 0x8506bcd400000000, + 0xa47623a800000000, 0xe24d44cd00000000, 0x2800ed6200000000, + 0x6e3b8a0700000000, 0xfd9dcee600000000, 0xbba6a98300000000, + 0x71eb002c00000000, 0x37d0674900000000, 0x330b3ed500000000, + 0x753059b000000000, 0xbf7df01f00000000, 0xf946977a00000000, + 0x6ae0d39b00000000, 0x2cdbb4fe00000000, 0xe6961d5100000000, + 0xa0ad7a3400000000, 0x81dde54800000000, 0xc7e6822d00000000, + 0x0dab2b8200000000, 0x4b904ce700000000, 0xd836080600000000, + 0x9e0d6f6300000000, 0x5440c6cc00000000, 0x127ba1a900000000, + 0x1df0042f00000000, 0x5bcb634a00000000, 0x9186cae500000000, + 0xd7bdad8000000000, 0x441be96100000000, 0x02208e0400000000, + 0xc86d27ab00000000, 0x8e5640ce00000000, 0xaf26dfb200000000, + 0xe91db8d700000000, 0x2350117800000000, 0x656b761d00000000, + 0xf6cd32fc00000000, 0xb0f6559900000000, 0x7abbfc3600000000, + 0x3c809b5300000000, 0x385bc2cf00000000, 0x7e60a5aa00000000, + 0xb42d0c0500000000, 0xf2166b6000000000, 0x61b02f8100000000, + 0x278b48e400000000, 0xedc6e14b00000000, 0xabfd862e00000000, + 0x8a8d195200000000, 0xccb67e3700000000, 0x06fbd79800000000, + 0x40c0b0fd00000000, 0xd366f41c00000000, 0x955d937900000000, + 0x5f103ad600000000, 0x192b5db300000000, 0x2c40f16b00000000, + 0x6a7b960e00000000, 0xa0363fa100000000, 0xe60d58c400000000, + 0x75ab1c2500000000, 0x33907b4000000000, 0xf9ddd2ef00000000, + 0xbfe6b58a00000000, 0x9e962af600000000, 0xd8ad4d9300000000, + 0x12e0e43c00000000, 0x54db835900000000, 0xc77dc7b800000000, + 0x8146a0dd00000000, 0x4b0b097200000000, 0x0d306e1700000000, + 0x09eb378b00000000, 0x4fd050ee00000000, 0x859df94100000000, + 0xc3a69e2400000000, 0x5000dac500000000, 0x163bbda000000000, + 0xdc76140f00000000, 0x9a4d736a00000000, 0xbb3dec1600000000, + 0xfd068b7300000000, 0x374b22dc00000000, 0x717045b900000000, + 0xe2d6015800000000, 0xa4ed663d00000000, 0x6ea0cf9200000000, + 0x289ba8f700000000, 0x27100d7100000000, 0x612b6a1400000000, + 0xab66c3bb00000000, 0xed5da4de00000000, 0x7efbe03f00000000, + 0x38c0875a00000000, 0xf28d2ef500000000, 0xb4b6499000000000, + 0x95c6d6ec00000000, 0xd3fdb18900000000, 0x19b0182600000000, + 0x5f8b7f4300000000, 0xcc2d3ba200000000, 0x8a165cc700000000, + 0x405bf56800000000, 0x0660920d00000000, 0x02bbcb9100000000, + 0x4480acf400000000, 0x8ecd055b00000000, 0xc8f6623e00000000, + 0x5b5026df00000000, 0x1d6b41ba00000000, 0xd726e81500000000, + 0x911d8f7000000000, 0xb06d100c00000000, 0xf656776900000000, + 0x3c1bdec600000000, 0x7a20b9a300000000, 0xe986fd4200000000, + 0xafbd9a2700000000, 0x65f0338800000000, 0x23cb54ed00000000, + 0x3ae0095e00000000, 0x7cdb6e3b00000000, 0xb696c79400000000, + 0xf0ada0f100000000, 0x630be41000000000, 0x2530837500000000, + 0xef7d2ada00000000, 0xa9464dbf00000000, 0x8836d2c300000000, + 0xce0db5a600000000, 0x04401c0900000000, 0x427b7b6c00000000, + 0xd1dd3f8d00000000, 0x97e658e800000000, 0x5dabf14700000000, + 0x1b90962200000000, 0x1f4bcfbe00000000, 0x5970a8db00000000, + 0x933d017400000000, 0xd506661100000000, 0x46a022f000000000, + 0x009b459500000000, 0xcad6ec3a00000000, 0x8ced8b5f00000000, + 0xad9d142300000000, 0xeba6734600000000, 0x21ebdae900000000, + 0x67d0bd8c00000000, 0xf476f96d00000000, 0xb24d9e0800000000, + 0x780037a700000000, 0x3e3b50c200000000, 0x31b0f54400000000, + 0x778b922100000000, 0xbdc63b8e00000000, 0xfbfd5ceb00000000, + 0x685b180a00000000, 0x2e607f6f00000000, 0xe42dd6c000000000, + 0xa216b1a500000000, 0x83662ed900000000, 0xc55d49bc00000000, + 0x0f10e01300000000, 0x492b877600000000, 0xda8dc39700000000, + 0x9cb6a4f200000000, 0x56fb0d5d00000000, 0x10c06a3800000000, + 0x141b33a400000000, 0x522054c100000000, 0x986dfd6e00000000, + 0xde569a0b00000000, 0x4df0deea00000000, 0x0bcbb98f00000000, + 0xc186102000000000, 0x87bd774500000000, 0xa6cde83900000000, + 0xe0f68f5c00000000, 0x2abb26f300000000, 0x6c80419600000000, + 0xff26057700000000, 0xb91d621200000000, 0x7350cbbd00000000, + 0x356bacd800000000}, + {0x0000000000000000, 0x9e83da9f00000000, 0x7d01c4e400000000, + 0xe3821e7b00000000, 0xbb04f91200000000, 0x2587238d00000000, + 0xc6053df600000000, 0x5886e76900000000, 0x7609f22500000000, + 0xe88a28ba00000000, 0x0b0836c100000000, 0x958bec5e00000000, + 0xcd0d0b3700000000, 0x538ed1a800000000, 0xb00ccfd300000000, + 0x2e8f154c00000000, 0xec12e44b00000000, 0x72913ed400000000, + 0x911320af00000000, 0x0f90fa3000000000, 0x57161d5900000000, + 0xc995c7c600000000, 0x2a17d9bd00000000, 0xb494032200000000, + 0x9a1b166e00000000, 0x0498ccf100000000, 0xe71ad28a00000000, + 0x7999081500000000, 0x211fef7c00000000, 0xbf9c35e300000000, + 0x5c1e2b9800000000, 0xc29df10700000000, 0xd825c89700000000, + 0x46a6120800000000, 0xa5240c7300000000, 0x3ba7d6ec00000000, + 0x6321318500000000, 0xfda2eb1a00000000, 0x1e20f56100000000, + 0x80a32ffe00000000, 0xae2c3ab200000000, 0x30afe02d00000000, + 0xd32dfe5600000000, 0x4dae24c900000000, 0x1528c3a000000000, + 0x8bab193f00000000, 0x6829074400000000, 0xf6aadddb00000000, + 0x34372cdc00000000, 0xaab4f64300000000, 0x4936e83800000000, + 0xd7b532a700000000, 0x8f33d5ce00000000, 0x11b00f5100000000, + 0xf232112a00000000, 0x6cb1cbb500000000, 0x423edef900000000, + 0xdcbd046600000000, 0x3f3f1a1d00000000, 0xa1bcc08200000000, + 0xf93a27eb00000000, 0x67b9fd7400000000, 0x843be30f00000000, + 0x1ab8399000000000, 0xf14de1f400000000, 0x6fce3b6b00000000, + 0x8c4c251000000000, 0x12cfff8f00000000, 0x4a4918e600000000, + 0xd4cac27900000000, 0x3748dc0200000000, 0xa9cb069d00000000, + 0x874413d100000000, 0x19c7c94e00000000, 0xfa45d73500000000, + 0x64c60daa00000000, 0x3c40eac300000000, 0xa2c3305c00000000, + 0x41412e2700000000, 0xdfc2f4b800000000, 0x1d5f05bf00000000, + 0x83dcdf2000000000, 0x605ec15b00000000, 0xfedd1bc400000000, + 0xa65bfcad00000000, 0x38d8263200000000, 0xdb5a384900000000, + 0x45d9e2d600000000, 0x6b56f79a00000000, 0xf5d52d0500000000, + 0x1657337e00000000, 0x88d4e9e100000000, 0xd0520e8800000000, + 0x4ed1d41700000000, 0xad53ca6c00000000, 0x33d010f300000000, + 0x2968296300000000, 0xb7ebf3fc00000000, 0x5469ed8700000000, + 0xcaea371800000000, 0x926cd07100000000, 0x0cef0aee00000000, + 0xef6d149500000000, 0x71eece0a00000000, 0x5f61db4600000000, + 0xc1e201d900000000, 0x22601fa200000000, 0xbce3c53d00000000, + 0xe465225400000000, 0x7ae6f8cb00000000, 0x9964e6b000000000, + 0x07e73c2f00000000, 0xc57acd2800000000, 0x5bf917b700000000, + 0xb87b09cc00000000, 0x26f8d35300000000, 0x7e7e343a00000000, + 0xe0fdeea500000000, 0x037ff0de00000000, 0x9dfc2a4100000000, + 0xb3733f0d00000000, 0x2df0e59200000000, 0xce72fbe900000000, + 0x50f1217600000000, 0x0877c61f00000000, 0x96f41c8000000000, + 0x757602fb00000000, 0xebf5d86400000000, 0xa39db33200000000, + 0x3d1e69ad00000000, 0xde9c77d600000000, 0x401fad4900000000, + 0x18994a2000000000, 0x861a90bf00000000, 0x65988ec400000000, + 0xfb1b545b00000000, 0xd594411700000000, 0x4b179b8800000000, + 0xa89585f300000000, 0x36165f6c00000000, 0x6e90b80500000000, + 0xf013629a00000000, 0x13917ce100000000, 0x8d12a67e00000000, + 0x4f8f577900000000, 0xd10c8de600000000, 0x328e939d00000000, + 0xac0d490200000000, 0xf48bae6b00000000, 0x6a0874f400000000, + 0x898a6a8f00000000, 0x1709b01000000000, 0x3986a55c00000000, + 0xa7057fc300000000, 0x448761b800000000, 0xda04bb2700000000, + 0x82825c4e00000000, 0x1c0186d100000000, 0xff8398aa00000000, + 0x6100423500000000, 0x7bb87ba500000000, 0xe53ba13a00000000, + 0x06b9bf4100000000, 0x983a65de00000000, 0xc0bc82b700000000, + 0x5e3f582800000000, 0xbdbd465300000000, 0x233e9ccc00000000, + 0x0db1898000000000, 0x9332531f00000000, 0x70b04d6400000000, + 0xee3397fb00000000, 0xb6b5709200000000, 0x2836aa0d00000000, + 0xcbb4b47600000000, 0x55376ee900000000, 0x97aa9fee00000000, + 0x0929457100000000, 0xeaab5b0a00000000, 0x7428819500000000, + 0x2cae66fc00000000, 0xb22dbc6300000000, 0x51afa21800000000, + 0xcf2c788700000000, 0xe1a36dcb00000000, 0x7f20b75400000000, + 0x9ca2a92f00000000, 0x022173b000000000, 0x5aa794d900000000, + 0xc4244e4600000000, 0x27a6503d00000000, 0xb9258aa200000000, + 0x52d052c600000000, 0xcc53885900000000, 0x2fd1962200000000, + 0xb1524cbd00000000, 0xe9d4abd400000000, 0x7757714b00000000, + 0x94d56f3000000000, 0x0a56b5af00000000, 0x24d9a0e300000000, + 0xba5a7a7c00000000, 0x59d8640700000000, 0xc75bbe9800000000, + 0x9fdd59f100000000, 0x015e836e00000000, 0xe2dc9d1500000000, + 0x7c5f478a00000000, 0xbec2b68d00000000, 0x20416c1200000000, + 0xc3c3726900000000, 0x5d40a8f600000000, 0x05c64f9f00000000, + 0x9b45950000000000, 0x78c78b7b00000000, 0xe64451e400000000, + 0xc8cb44a800000000, 0x56489e3700000000, 0xb5ca804c00000000, + 0x2b495ad300000000, 0x73cfbdba00000000, 0xed4c672500000000, + 0x0ece795e00000000, 0x904da3c100000000, 0x8af59a5100000000, + 0x147640ce00000000, 0xf7f45eb500000000, 0x6977842a00000000, + 0x31f1634300000000, 0xaf72b9dc00000000, 0x4cf0a7a700000000, + 0xd2737d3800000000, 0xfcfc687400000000, 0x627fb2eb00000000, + 0x81fdac9000000000, 0x1f7e760f00000000, 0x47f8916600000000, + 0xd97b4bf900000000, 0x3af9558200000000, 0xa47a8f1d00000000, + 0x66e77e1a00000000, 0xf864a48500000000, 0x1be6bafe00000000, + 0x8565606100000000, 0xdde3870800000000, 0x43605d9700000000, + 0xa0e243ec00000000, 0x3e61997300000000, 0x10ee8c3f00000000, + 0x8e6d56a000000000, 0x6def48db00000000, 0xf36c924400000000, + 0xabea752d00000000, 0x3569afb200000000, 0xd6ebb1c900000000, + 0x48686b5600000000}, + {0x0000000000000000, 0xc064281700000000, 0x80c9502e00000000, + 0x40ad783900000000, 0x0093a15c00000000, 0xc0f7894b00000000, + 0x805af17200000000, 0x403ed96500000000, 0x002643b900000000, + 0xc0426bae00000000, 0x80ef139700000000, 0x408b3b8000000000, + 0x00b5e2e500000000, 0xc0d1caf200000000, 0x807cb2cb00000000, + 0x40189adc00000000, 0x414af7a900000000, 0x812edfbe00000000, + 0xc183a78700000000, 0x01e78f9000000000, 0x41d956f500000000, + 0x81bd7ee200000000, 0xc11006db00000000, 0x01742ecc00000000, + 0x416cb41000000000, 0x81089c0700000000, 0xc1a5e43e00000000, + 0x01c1cc2900000000, 0x41ff154c00000000, 0x819b3d5b00000000, + 0xc136456200000000, 0x01526d7500000000, 0xc3929f8800000000, + 0x03f6b79f00000000, 0x435bcfa600000000, 0x833fe7b100000000, + 0xc3013ed400000000, 0x036516c300000000, 0x43c86efa00000000, + 0x83ac46ed00000000, 0xc3b4dc3100000000, 0x03d0f42600000000, + 0x437d8c1f00000000, 0x8319a40800000000, 0xc3277d6d00000000, + 0x0343557a00000000, 0x43ee2d4300000000, 0x838a055400000000, + 0x82d8682100000000, 0x42bc403600000000, 0x0211380f00000000, + 0xc275101800000000, 0x824bc97d00000000, 0x422fe16a00000000, + 0x0282995300000000, 0xc2e6b14400000000, 0x82fe2b9800000000, + 0x429a038f00000000, 0x02377bb600000000, 0xc25353a100000000, + 0x826d8ac400000000, 0x4209a2d300000000, 0x02a4daea00000000, + 0xc2c0f2fd00000000, 0xc7234eca00000000, 0x074766dd00000000, + 0x47ea1ee400000000, 0x878e36f300000000, 0xc7b0ef9600000000, + 0x07d4c78100000000, 0x4779bfb800000000, 0x871d97af00000000, + 0xc7050d7300000000, 0x0761256400000000, 0x47cc5d5d00000000, + 0x87a8754a00000000, 0xc796ac2f00000000, 0x07f2843800000000, + 0x475ffc0100000000, 0x873bd41600000000, 0x8669b96300000000, + 0x460d917400000000, 0x06a0e94d00000000, 0xc6c4c15a00000000, + 0x86fa183f00000000, 0x469e302800000000, 0x0633481100000000, + 0xc657600600000000, 0x864ffada00000000, 0x462bd2cd00000000, + 0x0686aaf400000000, 0xc6e282e300000000, 0x86dc5b8600000000, + 0x46b8739100000000, 0x06150ba800000000, 0xc67123bf00000000, + 0x04b1d14200000000, 0xc4d5f95500000000, 0x8478816c00000000, + 0x441ca97b00000000, 0x0422701e00000000, 0xc446580900000000, + 0x84eb203000000000, 0x448f082700000000, 0x049792fb00000000, + 0xc4f3baec00000000, 0x845ec2d500000000, 0x443aeac200000000, + 0x040433a700000000, 0xc4601bb000000000, 0x84cd638900000000, + 0x44a94b9e00000000, 0x45fb26eb00000000, 0x859f0efc00000000, + 0xc53276c500000000, 0x05565ed200000000, 0x456887b700000000, + 0x850cafa000000000, 0xc5a1d79900000000, 0x05c5ff8e00000000, + 0x45dd655200000000, 0x85b94d4500000000, 0xc514357c00000000, + 0x05701d6b00000000, 0x454ec40e00000000, 0x852aec1900000000, + 0xc587942000000000, 0x05e3bc3700000000, 0xcf41ed4f00000000, + 0x0f25c55800000000, 0x4f88bd6100000000, 0x8fec957600000000, + 0xcfd24c1300000000, 0x0fb6640400000000, 0x4f1b1c3d00000000, + 0x8f7f342a00000000, 0xcf67aef600000000, 0x0f0386e100000000, + 0x4faefed800000000, 0x8fcad6cf00000000, 0xcff40faa00000000, + 0x0f9027bd00000000, 0x4f3d5f8400000000, 0x8f59779300000000, + 0x8e0b1ae600000000, 0x4e6f32f100000000, 0x0ec24ac800000000, + 0xcea662df00000000, 0x8e98bbba00000000, 0x4efc93ad00000000, + 0x0e51eb9400000000, 0xce35c38300000000, 0x8e2d595f00000000, + 0x4e49714800000000, 0x0ee4097100000000, 0xce80216600000000, + 0x8ebef80300000000, 0x4edad01400000000, 0x0e77a82d00000000, + 0xce13803a00000000, 0x0cd372c700000000, 0xccb75ad000000000, + 0x8c1a22e900000000, 0x4c7e0afe00000000, 0x0c40d39b00000000, + 0xcc24fb8c00000000, 0x8c8983b500000000, 0x4cedaba200000000, + 0x0cf5317e00000000, 0xcc91196900000000, 0x8c3c615000000000, + 0x4c58494700000000, 0x0c66902200000000, 0xcc02b83500000000, + 0x8cafc00c00000000, 0x4ccbe81b00000000, 0x4d99856e00000000, + 0x8dfdad7900000000, 0xcd50d54000000000, 0x0d34fd5700000000, + 0x4d0a243200000000, 0x8d6e0c2500000000, 0xcdc3741c00000000, + 0x0da75c0b00000000, 0x4dbfc6d700000000, 0x8ddbeec000000000, + 0xcd7696f900000000, 0x0d12beee00000000, 0x4d2c678b00000000, + 0x8d484f9c00000000, 0xcde537a500000000, 0x0d811fb200000000, + 0x0862a38500000000, 0xc8068b9200000000, 0x88abf3ab00000000, + 0x48cfdbbc00000000, 0x08f102d900000000, 0xc8952ace00000000, + 0x883852f700000000, 0x485c7ae000000000, 0x0844e03c00000000, + 0xc820c82b00000000, 0x888db01200000000, 0x48e9980500000000, + 0x08d7416000000000, 0xc8b3697700000000, 0x881e114e00000000, + 0x487a395900000000, 0x4928542c00000000, 0x894c7c3b00000000, + 0xc9e1040200000000, 0x09852c1500000000, 0x49bbf57000000000, + 0x89dfdd6700000000, 0xc972a55e00000000, 0x09168d4900000000, + 0x490e179500000000, 0x896a3f8200000000, 0xc9c747bb00000000, + 0x09a36fac00000000, 0x499db6c900000000, 0x89f99ede00000000, + 0xc954e6e700000000, 0x0930cef000000000, 0xcbf03c0d00000000, + 0x0b94141a00000000, 0x4b396c2300000000, 0x8b5d443400000000, + 0xcb639d5100000000, 0x0b07b54600000000, 0x4baacd7f00000000, + 0x8bcee56800000000, 0xcbd67fb400000000, 0x0bb257a300000000, + 0x4b1f2f9a00000000, 0x8b7b078d00000000, 0xcb45dee800000000, + 0x0b21f6ff00000000, 0x4b8c8ec600000000, 0x8be8a6d100000000, + 0x8abacba400000000, 0x4adee3b300000000, 0x0a739b8a00000000, + 0xca17b39d00000000, 0x8a296af800000000, 0x4a4d42ef00000000, + 0x0ae03ad600000000, 0xca8412c100000000, 0x8a9c881d00000000, + 0x4af8a00a00000000, 0x0a55d83300000000, 0xca31f02400000000, + 0x8a0f294100000000, 0x4a6b015600000000, 0x0ac6796f00000000, + 0xcaa2517800000000}, + {0x0000000000000000, 0xd4ea739b00000000, 0xe9d396ed00000000, + 0x3d39e57600000000, 0x93a15c0000000000, 0x474b2f9b00000000, + 0x7a72caed00000000, 0xae98b97600000000, 0x2643b90000000000, + 0xf2a9ca9b00000000, 0xcf902fed00000000, 0x1b7a5c7600000000, + 0xb5e2e50000000000, 0x6108969b00000000, 0x5c3173ed00000000, + 0x88db007600000000, 0x4c86720100000000, 0x986c019a00000000, + 0xa555e4ec00000000, 0x71bf977700000000, 0xdf272e0100000000, + 0x0bcd5d9a00000000, 0x36f4b8ec00000000, 0xe21ecb7700000000, + 0x6ac5cb0100000000, 0xbe2fb89a00000000, 0x83165dec00000000, + 0x57fc2e7700000000, 0xf964970100000000, 0x2d8ee49a00000000, + 0x10b701ec00000000, 0xc45d727700000000, 0x980ce50200000000, + 0x4ce6969900000000, 0x71df73ef00000000, 0xa535007400000000, + 0x0badb90200000000, 0xdf47ca9900000000, 0xe27e2fef00000000, + 0x36945c7400000000, 0xbe4f5c0200000000, 0x6aa52f9900000000, + 0x579ccaef00000000, 0x8376b97400000000, 0x2dee000200000000, + 0xf904739900000000, 0xc43d96ef00000000, 0x10d7e57400000000, + 0xd48a970300000000, 0x0060e49800000000, 0x3d5901ee00000000, + 0xe9b3727500000000, 0x472bcb0300000000, 0x93c1b89800000000, + 0xaef85dee00000000, 0x7a122e7500000000, 0xf2c92e0300000000, + 0x26235d9800000000, 0x1b1ab8ee00000000, 0xcff0cb7500000000, + 0x6168720300000000, 0xb582019800000000, 0x88bbe4ee00000000, + 0x5c51977500000000, 0x3019ca0500000000, 0xe4f3b99e00000000, + 0xd9ca5ce800000000, 0x0d202f7300000000, 0xa3b8960500000000, + 0x7752e59e00000000, 0x4a6b00e800000000, 0x9e81737300000000, + 0x165a730500000000, 0xc2b0009e00000000, 0xff89e5e800000000, + 0x2b63967300000000, 0x85fb2f0500000000, 0x51115c9e00000000, + 0x6c28b9e800000000, 0xb8c2ca7300000000, 0x7c9fb80400000000, + 0xa875cb9f00000000, 0x954c2ee900000000, 0x41a65d7200000000, + 0xef3ee40400000000, 0x3bd4979f00000000, 0x06ed72e900000000, + 0xd207017200000000, 0x5adc010400000000, 0x8e36729f00000000, + 0xb30f97e900000000, 0x67e5e47200000000, 0xc97d5d0400000000, + 0x1d972e9f00000000, 0x20aecbe900000000, 0xf444b87200000000, + 0xa8152f0700000000, 0x7cff5c9c00000000, 0x41c6b9ea00000000, + 0x952cca7100000000, 0x3bb4730700000000, 0xef5e009c00000000, + 0xd267e5ea00000000, 0x068d967100000000, 0x8e56960700000000, + 0x5abce59c00000000, 0x678500ea00000000, 0xb36f737100000000, + 0x1df7ca0700000000, 0xc91db99c00000000, 0xf4245cea00000000, + 0x20ce2f7100000000, 0xe4935d0600000000, 0x30792e9d00000000, + 0x0d40cbeb00000000, 0xd9aab87000000000, 0x7732010600000000, + 0xa3d8729d00000000, 0x9ee197eb00000000, 0x4a0be47000000000, + 0xc2d0e40600000000, 0x163a979d00000000, 0x2b0372eb00000000, + 0xffe9017000000000, 0x5171b80600000000, 0x859bcb9d00000000, + 0xb8a22eeb00000000, 0x6c485d7000000000, 0x6032940b00000000, + 0xb4d8e79000000000, 0x89e102e600000000, 0x5d0b717d00000000, + 0xf393c80b00000000, 0x2779bb9000000000, 0x1a405ee600000000, + 0xceaa2d7d00000000, 0x46712d0b00000000, 0x929b5e9000000000, + 0xafa2bbe600000000, 0x7b48c87d00000000, 0xd5d0710b00000000, + 0x013a029000000000, 0x3c03e7e600000000, 0xe8e9947d00000000, + 0x2cb4e60a00000000, 0xf85e959100000000, 0xc56770e700000000, + 0x118d037c00000000, 0xbf15ba0a00000000, 0x6bffc99100000000, + 0x56c62ce700000000, 0x822c5f7c00000000, 0x0af75f0a00000000, + 0xde1d2c9100000000, 0xe324c9e700000000, 0x37ceba7c00000000, + 0x9956030a00000000, 0x4dbc709100000000, 0x708595e700000000, + 0xa46fe67c00000000, 0xf83e710900000000, 0x2cd4029200000000, + 0x11ede7e400000000, 0xc507947f00000000, 0x6b9f2d0900000000, + 0xbf755e9200000000, 0x824cbbe400000000, 0x56a6c87f00000000, + 0xde7dc80900000000, 0x0a97bb9200000000, 0x37ae5ee400000000, + 0xe3442d7f00000000, 0x4ddc940900000000, 0x9936e79200000000, + 0xa40f02e400000000, 0x70e5717f00000000, 0xb4b8030800000000, + 0x6052709300000000, 0x5d6b95e500000000, 0x8981e67e00000000, + 0x27195f0800000000, 0xf3f32c9300000000, 0xcecac9e500000000, + 0x1a20ba7e00000000, 0x92fbba0800000000, 0x4611c99300000000, + 0x7b282ce500000000, 0xafc25f7e00000000, 0x015ae60800000000, + 0xd5b0959300000000, 0xe88970e500000000, 0x3c63037e00000000, + 0x502b5e0e00000000, 0x84c12d9500000000, 0xb9f8c8e300000000, + 0x6d12bb7800000000, 0xc38a020e00000000, 0x1760719500000000, + 0x2a5994e300000000, 0xfeb3e77800000000, 0x7668e70e00000000, + 0xa282949500000000, 0x9fbb71e300000000, 0x4b51027800000000, + 0xe5c9bb0e00000000, 0x3123c89500000000, 0x0c1a2de300000000, + 0xd8f05e7800000000, 0x1cad2c0f00000000, 0xc8475f9400000000, + 0xf57ebae200000000, 0x2194c97900000000, 0x8f0c700f00000000, + 0x5be6039400000000, 0x66dfe6e200000000, 0xb235957900000000, + 0x3aee950f00000000, 0xee04e69400000000, 0xd33d03e200000000, + 0x07d7707900000000, 0xa94fc90f00000000, 0x7da5ba9400000000, + 0x409c5fe200000000, 0x94762c7900000000, 0xc827bb0c00000000, + 0x1ccdc89700000000, 0x21f42de100000000, 0xf51e5e7a00000000, + 0x5b86e70c00000000, 0x8f6c949700000000, 0xb25571e100000000, + 0x66bf027a00000000, 0xee64020c00000000, 0x3a8e719700000000, + 0x07b794e100000000, 0xd35de77a00000000, 0x7dc55e0c00000000, + 0xa92f2d9700000000, 0x9416c8e100000000, 0x40fcbb7a00000000, + 0x84a1c90d00000000, 0x504bba9600000000, 0x6d725fe000000000, + 0xb9982c7b00000000, 0x1700950d00000000, 0xc3eae69600000000, + 0xfed303e000000000, 0x2a39707b00000000, 0xa2e2700d00000000, + 0x7608039600000000, 0x4b31e6e000000000, 0x9fdb957b00000000, + 0x31432c0d00000000, 0xe5a95f9600000000, 0xd890bae000000000, + 0x0c7ac97b00000000}, + {0x0000000000000000, 0x2765258100000000, 0x0fcc3bd900000000, + 0x28a91e5800000000, 0x5f9e066900000000, 0x78fb23e800000000, + 0x50523db000000000, 0x7737183100000000, 0xbe3c0dd200000000, + 0x9959285300000000, 0xb1f0360b00000000, 0x9695138a00000000, + 0xe1a20bbb00000000, 0xc6c72e3a00000000, 0xee6e306200000000, + 0xc90b15e300000000, 0x3d7f6b7f00000000, 0x1a1a4efe00000000, + 0x32b350a600000000, 0x15d6752700000000, 0x62e16d1600000000, + 0x4584489700000000, 0x6d2d56cf00000000, 0x4a48734e00000000, + 0x834366ad00000000, 0xa426432c00000000, 0x8c8f5d7400000000, + 0xabea78f500000000, 0xdcdd60c400000000, 0xfbb8454500000000, + 0xd3115b1d00000000, 0xf4747e9c00000000, 0x7afed6fe00000000, + 0x5d9bf37f00000000, 0x7532ed2700000000, 0x5257c8a600000000, + 0x2560d09700000000, 0x0205f51600000000, 0x2aaceb4e00000000, + 0x0dc9cecf00000000, 0xc4c2db2c00000000, 0xe3a7fead00000000, + 0xcb0ee0f500000000, 0xec6bc57400000000, 0x9b5cdd4500000000, + 0xbc39f8c400000000, 0x9490e69c00000000, 0xb3f5c31d00000000, + 0x4781bd8100000000, 0x60e4980000000000, 0x484d865800000000, + 0x6f28a3d900000000, 0x181fbbe800000000, 0x3f7a9e6900000000, + 0x17d3803100000000, 0x30b6a5b000000000, 0xf9bdb05300000000, + 0xded895d200000000, 0xf6718b8a00000000, 0xd114ae0b00000000, + 0xa623b63a00000000, 0x814693bb00000000, 0xa9ef8de300000000, + 0x8e8aa86200000000, 0xb5fadc2600000000, 0x929ff9a700000000, + 0xba36e7ff00000000, 0x9d53c27e00000000, 0xea64da4f00000000, + 0xcd01ffce00000000, 0xe5a8e19600000000, 0xc2cdc41700000000, + 0x0bc6d1f400000000, 0x2ca3f47500000000, 0x040aea2d00000000, + 0x236fcfac00000000, 0x5458d79d00000000, 0x733df21c00000000, + 0x5b94ec4400000000, 0x7cf1c9c500000000, 0x8885b75900000000, + 0xafe092d800000000, 0x87498c8000000000, 0xa02ca90100000000, + 0xd71bb13000000000, 0xf07e94b100000000, 0xd8d78ae900000000, + 0xffb2af6800000000, 0x36b9ba8b00000000, 0x11dc9f0a00000000, + 0x3975815200000000, 0x1e10a4d300000000, 0x6927bce200000000, + 0x4e42996300000000, 0x66eb873b00000000, 0x418ea2ba00000000, + 0xcf040ad800000000, 0xe8612f5900000000, 0xc0c8310100000000, + 0xe7ad148000000000, 0x909a0cb100000000, 0xb7ff293000000000, + 0x9f56376800000000, 0xb83312e900000000, 0x7138070a00000000, + 0x565d228b00000000, 0x7ef43cd300000000, 0x5991195200000000, + 0x2ea6016300000000, 0x09c324e200000000, 0x216a3aba00000000, + 0x060f1f3b00000000, 0xf27b61a700000000, 0xd51e442600000000, + 0xfdb75a7e00000000, 0xdad27fff00000000, 0xade567ce00000000, + 0x8a80424f00000000, 0xa2295c1700000000, 0x854c799600000000, + 0x4c476c7500000000, 0x6b2249f400000000, 0x438b57ac00000000, + 0x64ee722d00000000, 0x13d96a1c00000000, 0x34bc4f9d00000000, + 0x1c1551c500000000, 0x3b70744400000000, 0x6af5b94d00000000, + 0x4d909ccc00000000, 0x6539829400000000, 0x425ca71500000000, + 0x356bbf2400000000, 0x120e9aa500000000, 0x3aa784fd00000000, + 0x1dc2a17c00000000, 0xd4c9b49f00000000, 0xf3ac911e00000000, + 0xdb058f4600000000, 0xfc60aac700000000, 0x8b57b2f600000000, + 0xac32977700000000, 0x849b892f00000000, 0xa3feacae00000000, + 0x578ad23200000000, 0x70eff7b300000000, 0x5846e9eb00000000, + 0x7f23cc6a00000000, 0x0814d45b00000000, 0x2f71f1da00000000, + 0x07d8ef8200000000, 0x20bdca0300000000, 0xe9b6dfe000000000, + 0xced3fa6100000000, 0xe67ae43900000000, 0xc11fc1b800000000, + 0xb628d98900000000, 0x914dfc0800000000, 0xb9e4e25000000000, + 0x9e81c7d100000000, 0x100b6fb300000000, 0x376e4a3200000000, + 0x1fc7546a00000000, 0x38a271eb00000000, 0x4f9569da00000000, + 0x68f04c5b00000000, 0x4059520300000000, 0x673c778200000000, + 0xae37626100000000, 0x895247e000000000, 0xa1fb59b800000000, + 0x869e7c3900000000, 0xf1a9640800000000, 0xd6cc418900000000, + 0xfe655fd100000000, 0xd9007a5000000000, 0x2d7404cc00000000, + 0x0a11214d00000000, 0x22b83f1500000000, 0x05dd1a9400000000, + 0x72ea02a500000000, 0x558f272400000000, 0x7d26397c00000000, + 0x5a431cfd00000000, 0x9348091e00000000, 0xb42d2c9f00000000, + 0x9c8432c700000000, 0xbbe1174600000000, 0xccd60f7700000000, + 0xebb32af600000000, 0xc31a34ae00000000, 0xe47f112f00000000, + 0xdf0f656b00000000, 0xf86a40ea00000000, 0xd0c35eb200000000, + 0xf7a67b3300000000, 0x8091630200000000, 0xa7f4468300000000, + 0x8f5d58db00000000, 0xa8387d5a00000000, 0x613368b900000000, + 0x46564d3800000000, 0x6eff536000000000, 0x499a76e100000000, + 0x3ead6ed000000000, 0x19c84b5100000000, 0x3161550900000000, + 0x1604708800000000, 0xe2700e1400000000, 0xc5152b9500000000, + 0xedbc35cd00000000, 0xcad9104c00000000, 0xbdee087d00000000, + 0x9a8b2dfc00000000, 0xb22233a400000000, 0x9547162500000000, + 0x5c4c03c600000000, 0x7b29264700000000, 0x5380381f00000000, + 0x74e51d9e00000000, 0x03d205af00000000, 0x24b7202e00000000, + 0x0c1e3e7600000000, 0x2b7b1bf700000000, 0xa5f1b39500000000, + 0x8294961400000000, 0xaa3d884c00000000, 0x8d58adcd00000000, + 0xfa6fb5fc00000000, 0xdd0a907d00000000, 0xf5a38e2500000000, + 0xd2c6aba400000000, 0x1bcdbe4700000000, 0x3ca89bc600000000, + 0x1401859e00000000, 0x3364a01f00000000, 0x4453b82e00000000, + 0x63369daf00000000, 0x4b9f83f700000000, 0x6cfaa67600000000, + 0x988ed8ea00000000, 0xbfebfd6b00000000, 0x9742e33300000000, + 0xb027c6b200000000, 0xc710de8300000000, 0xe075fb0200000000, + 0xc8dce55a00000000, 0xefb9c0db00000000, 0x26b2d53800000000, + 0x01d7f0b900000000, 0x297eeee100000000, 0x0e1bcb6000000000, + 0x792cd35100000000, 0x5e49f6d000000000, 0x76e0e88800000000, + 0x5185cd0900000000}}; + +#else /* W == 4 */ + +local const z_crc_t FAR crc_braid_table[][256] = { + {0x00000000, 0x9ba54c6f, 0xec3b9e9f, 0x779ed2f0, 0x03063b7f, + 0x98a37710, 0xef3da5e0, 0x7498e98f, 0x060c76fe, 0x9da93a91, + 0xea37e861, 0x7192a40e, 0x050a4d81, 0x9eaf01ee, 0xe931d31e, + 0x72949f71, 0x0c18edfc, 0x97bda193, 0xe0237363, 0x7b863f0c, + 0x0f1ed683, 0x94bb9aec, 0xe325481c, 0x78800473, 0x0a149b02, + 0x91b1d76d, 0xe62f059d, 0x7d8a49f2, 0x0912a07d, 0x92b7ec12, + 0xe5293ee2, 0x7e8c728d, 0x1831dbf8, 0x83949797, 0xf40a4567, + 0x6faf0908, 0x1b37e087, 0x8092ace8, 0xf70c7e18, 0x6ca93277, + 0x1e3dad06, 0x8598e169, 0xf2063399, 0x69a37ff6, 0x1d3b9679, + 0x869eda16, 0xf10008e6, 0x6aa54489, 0x14293604, 0x8f8c7a6b, + 0xf812a89b, 0x63b7e4f4, 0x172f0d7b, 0x8c8a4114, 0xfb1493e4, + 0x60b1df8b, 0x122540fa, 0x89800c95, 0xfe1ede65, 0x65bb920a, + 0x11237b85, 0x8a8637ea, 0xfd18e51a, 0x66bda975, 0x3063b7f0, + 0xabc6fb9f, 0xdc58296f, 0x47fd6500, 0x33658c8f, 0xa8c0c0e0, + 0xdf5e1210, 0x44fb5e7f, 0x366fc10e, 0xadca8d61, 0xda545f91, + 0x41f113fe, 0x3569fa71, 0xaeccb61e, 0xd95264ee, 0x42f72881, + 0x3c7b5a0c, 0xa7de1663, 0xd040c493, 0x4be588fc, 0x3f7d6173, + 0xa4d82d1c, 0xd346ffec, 0x48e3b383, 0x3a772cf2, 0xa1d2609d, + 0xd64cb26d, 0x4de9fe02, 0x3971178d, 0xa2d45be2, 0xd54a8912, + 0x4eefc57d, 0x28526c08, 0xb3f72067, 0xc469f297, 0x5fccbef8, + 0x2b545777, 0xb0f11b18, 0xc76fc9e8, 0x5cca8587, 0x2e5e1af6, + 0xb5fb5699, 0xc2658469, 0x59c0c806, 0x2d582189, 0xb6fd6de6, + 0xc163bf16, 0x5ac6f379, 0x244a81f4, 0xbfefcd9b, 0xc8711f6b, + 0x53d45304, 0x274cba8b, 0xbce9f6e4, 0xcb772414, 0x50d2687b, + 0x2246f70a, 0xb9e3bb65, 0xce7d6995, 0x55d825fa, 0x2140cc75, + 0xbae5801a, 0xcd7b52ea, 0x56de1e85, 0x60c76fe0, 0xfb62238f, + 0x8cfcf17f, 0x1759bd10, 0x63c1549f, 0xf86418f0, 0x8ffaca00, + 0x145f866f, 0x66cb191e, 0xfd6e5571, 0x8af08781, 0x1155cbee, + 0x65cd2261, 0xfe686e0e, 0x89f6bcfe, 0x1253f091, 0x6cdf821c, + 0xf77ace73, 0x80e41c83, 0x1b4150ec, 0x6fd9b963, 0xf47cf50c, + 0x83e227fc, 0x18476b93, 0x6ad3f4e2, 0xf176b88d, 0x86e86a7d, + 0x1d4d2612, 0x69d5cf9d, 0xf27083f2, 0x85ee5102, 0x1e4b1d6d, + 0x78f6b418, 0xe353f877, 0x94cd2a87, 0x0f6866e8, 0x7bf08f67, + 0xe055c308, 0x97cb11f8, 0x0c6e5d97, 0x7efac2e6, 0xe55f8e89, + 0x92c15c79, 0x09641016, 0x7dfcf999, 0xe659b5f6, 0x91c76706, + 0x0a622b69, 0x74ee59e4, 0xef4b158b, 0x98d5c77b, 0x03708b14, + 0x77e8629b, 0xec4d2ef4, 0x9bd3fc04, 0x0076b06b, 0x72e22f1a, + 0xe9476375, 0x9ed9b185, 0x057cfdea, 0x71e41465, 0xea41580a, + 0x9ddf8afa, 0x067ac695, 0x50a4d810, 0xcb01947f, 0xbc9f468f, + 0x273a0ae0, 0x53a2e36f, 0xc807af00, 0xbf997df0, 0x243c319f, + 0x56a8aeee, 0xcd0de281, 0xba933071, 0x21367c1e, 0x55ae9591, + 0xce0bd9fe, 0xb9950b0e, 0x22304761, 0x5cbc35ec, 0xc7197983, + 0xb087ab73, 0x2b22e71c, 0x5fba0e93, 0xc41f42fc, 0xb381900c, + 0x2824dc63, 0x5ab04312, 0xc1150f7d, 0xb68bdd8d, 0x2d2e91e2, + 0x59b6786d, 0xc2133402, 0xb58de6f2, 0x2e28aa9d, 0x489503e8, + 0xd3304f87, 0xa4ae9d77, 0x3f0bd118, 0x4b933897, 0xd03674f8, + 0xa7a8a608, 0x3c0dea67, 0x4e997516, 0xd53c3979, 0xa2a2eb89, + 0x3907a7e6, 0x4d9f4e69, 0xd63a0206, 0xa1a4d0f6, 0x3a019c99, + 0x448dee14, 0xdf28a27b, 0xa8b6708b, 0x33133ce4, 0x478bd56b, + 0xdc2e9904, 0xabb04bf4, 0x3015079b, 0x428198ea, 0xd924d485, + 0xaeba0675, 0x351f4a1a, 0x4187a395, 0xda22effa, 0xadbc3d0a, + 0x36197165}, + {0x00000000, 0xc18edfc0, 0x586cb9c1, 0x99e26601, 0xb0d97382, + 0x7157ac42, 0xe8b5ca43, 0x293b1583, 0xbac3e145, 0x7b4d3e85, + 0xe2af5884, 0x23218744, 0x0a1a92c7, 0xcb944d07, 0x52762b06, + 0x93f8f4c6, 0xaef6c4cb, 0x6f781b0b, 0xf69a7d0a, 0x3714a2ca, + 0x1e2fb749, 0xdfa16889, 0x46430e88, 0x87cdd148, 0x1435258e, + 0xd5bbfa4e, 0x4c599c4f, 0x8dd7438f, 0xa4ec560c, 0x656289cc, + 0xfc80efcd, 0x3d0e300d, 0x869c8fd7, 0x47125017, 0xdef03616, + 0x1f7ee9d6, 0x3645fc55, 0xf7cb2395, 0x6e294594, 0xafa79a54, + 0x3c5f6e92, 0xfdd1b152, 0x6433d753, 0xa5bd0893, 0x8c861d10, + 0x4d08c2d0, 0xd4eaa4d1, 0x15647b11, 0x286a4b1c, 0xe9e494dc, + 0x7006f2dd, 0xb1882d1d, 0x98b3389e, 0x593de75e, 0xc0df815f, + 0x01515e9f, 0x92a9aa59, 0x53277599, 0xcac51398, 0x0b4bcc58, + 0x2270d9db, 0xe3fe061b, 0x7a1c601a, 0xbb92bfda, 0xd64819ef, + 0x17c6c62f, 0x8e24a02e, 0x4faa7fee, 0x66916a6d, 0xa71fb5ad, + 0x3efdd3ac, 0xff730c6c, 0x6c8bf8aa, 0xad05276a, 0x34e7416b, + 0xf5699eab, 0xdc528b28, 0x1ddc54e8, 0x843e32e9, 0x45b0ed29, + 0x78bedd24, 0xb93002e4, 0x20d264e5, 0xe15cbb25, 0xc867aea6, + 0x09e97166, 0x900b1767, 0x5185c8a7, 0xc27d3c61, 0x03f3e3a1, + 0x9a1185a0, 0x5b9f5a60, 0x72a44fe3, 0xb32a9023, 0x2ac8f622, + 0xeb4629e2, 0x50d49638, 0x915a49f8, 0x08b82ff9, 0xc936f039, + 0xe00de5ba, 0x21833a7a, 0xb8615c7b, 0x79ef83bb, 0xea17777d, + 0x2b99a8bd, 0xb27bcebc, 0x73f5117c, 0x5ace04ff, 0x9b40db3f, + 0x02a2bd3e, 0xc32c62fe, 0xfe2252f3, 0x3fac8d33, 0xa64eeb32, + 0x67c034f2, 0x4efb2171, 0x8f75feb1, 0x169798b0, 0xd7194770, + 0x44e1b3b6, 0x856f6c76, 0x1c8d0a77, 0xdd03d5b7, 0xf438c034, + 0x35b61ff4, 0xac5479f5, 0x6ddaa635, 0x77e1359f, 0xb66fea5f, + 0x2f8d8c5e, 0xee03539e, 0xc738461d, 0x06b699dd, 0x9f54ffdc, + 0x5eda201c, 0xcd22d4da, 0x0cac0b1a, 0x954e6d1b, 0x54c0b2db, + 0x7dfba758, 0xbc757898, 0x25971e99, 0xe419c159, 0xd917f154, + 0x18992e94, 0x817b4895, 0x40f59755, 0x69ce82d6, 0xa8405d16, + 0x31a23b17, 0xf02ce4d7, 0x63d41011, 0xa25acfd1, 0x3bb8a9d0, + 0xfa367610, 0xd30d6393, 0x1283bc53, 0x8b61da52, 0x4aef0592, + 0xf17dba48, 0x30f36588, 0xa9110389, 0x689fdc49, 0x41a4c9ca, + 0x802a160a, 0x19c8700b, 0xd846afcb, 0x4bbe5b0d, 0x8a3084cd, + 0x13d2e2cc, 0xd25c3d0c, 0xfb67288f, 0x3ae9f74f, 0xa30b914e, + 0x62854e8e, 0x5f8b7e83, 0x9e05a143, 0x07e7c742, 0xc6691882, + 0xef520d01, 0x2edcd2c1, 0xb73eb4c0, 0x76b06b00, 0xe5489fc6, + 0x24c64006, 0xbd242607, 0x7caaf9c7, 0x5591ec44, 0x941f3384, + 0x0dfd5585, 0xcc738a45, 0xa1a92c70, 0x6027f3b0, 0xf9c595b1, + 0x384b4a71, 0x11705ff2, 0xd0fe8032, 0x491ce633, 0x889239f3, + 0x1b6acd35, 0xdae412f5, 0x430674f4, 0x8288ab34, 0xabb3beb7, + 0x6a3d6177, 0xf3df0776, 0x3251d8b6, 0x0f5fe8bb, 0xced1377b, + 0x5733517a, 0x96bd8eba, 0xbf869b39, 0x7e0844f9, 0xe7ea22f8, + 0x2664fd38, 0xb59c09fe, 0x7412d63e, 0xedf0b03f, 0x2c7e6fff, + 0x05457a7c, 0xc4cba5bc, 0x5d29c3bd, 0x9ca71c7d, 0x2735a3a7, + 0xe6bb7c67, 0x7f591a66, 0xbed7c5a6, 0x97ecd025, 0x56620fe5, + 0xcf8069e4, 0x0e0eb624, 0x9df642e2, 0x5c789d22, 0xc59afb23, + 0x041424e3, 0x2d2f3160, 0xeca1eea0, 0x754388a1, 0xb4cd5761, + 0x89c3676c, 0x484db8ac, 0xd1afdead, 0x1021016d, 0x391a14ee, + 0xf894cb2e, 0x6176ad2f, 0xa0f872ef, 0x33008629, 0xf28e59e9, + 0x6b6c3fe8, 0xaae2e028, 0x83d9f5ab, 0x42572a6b, 0xdbb54c6a, + 0x1a3b93aa}, + {0x00000000, 0xefc26b3e, 0x04f5d03d, 0xeb37bb03, 0x09eba07a, + 0xe629cb44, 0x0d1e7047, 0xe2dc1b79, 0x13d740f4, 0xfc152bca, + 0x172290c9, 0xf8e0fbf7, 0x1a3ce08e, 0xf5fe8bb0, 0x1ec930b3, + 0xf10b5b8d, 0x27ae81e8, 0xc86cead6, 0x235b51d5, 0xcc993aeb, + 0x2e452192, 0xc1874aac, 0x2ab0f1af, 0xc5729a91, 0x3479c11c, + 0xdbbbaa22, 0x308c1121, 0xdf4e7a1f, 0x3d926166, 0xd2500a58, + 0x3967b15b, 0xd6a5da65, 0x4f5d03d0, 0xa09f68ee, 0x4ba8d3ed, + 0xa46ab8d3, 0x46b6a3aa, 0xa974c894, 0x42437397, 0xad8118a9, + 0x5c8a4324, 0xb348281a, 0x587f9319, 0xb7bdf827, 0x5561e35e, + 0xbaa38860, 0x51943363, 0xbe56585d, 0x68f38238, 0x8731e906, + 0x6c065205, 0x83c4393b, 0x61182242, 0x8eda497c, 0x65edf27f, + 0x8a2f9941, 0x7b24c2cc, 0x94e6a9f2, 0x7fd112f1, 0x901379cf, + 0x72cf62b6, 0x9d0d0988, 0x763ab28b, 0x99f8d9b5, 0x9eba07a0, + 0x71786c9e, 0x9a4fd79d, 0x758dbca3, 0x9751a7da, 0x7893cce4, + 0x93a477e7, 0x7c661cd9, 0x8d6d4754, 0x62af2c6a, 0x89989769, + 0x665afc57, 0x8486e72e, 0x6b448c10, 0x80733713, 0x6fb15c2d, + 0xb9148648, 0x56d6ed76, 0xbde15675, 0x52233d4b, 0xb0ff2632, + 0x5f3d4d0c, 0xb40af60f, 0x5bc89d31, 0xaac3c6bc, 0x4501ad82, + 0xae361681, 0x41f47dbf, 0xa32866c6, 0x4cea0df8, 0xa7ddb6fb, + 0x481fddc5, 0xd1e70470, 0x3e256f4e, 0xd512d44d, 0x3ad0bf73, + 0xd80ca40a, 0x37cecf34, 0xdcf97437, 0x333b1f09, 0xc2304484, + 0x2df22fba, 0xc6c594b9, 0x2907ff87, 0xcbdbe4fe, 0x24198fc0, + 0xcf2e34c3, 0x20ec5ffd, 0xf6498598, 0x198beea6, 0xf2bc55a5, + 0x1d7e3e9b, 0xffa225e2, 0x10604edc, 0xfb57f5df, 0x14959ee1, + 0xe59ec56c, 0x0a5cae52, 0xe16b1551, 0x0ea97e6f, 0xec756516, + 0x03b70e28, 0xe880b52b, 0x0742de15, 0xe6050901, 0x09c7623f, + 0xe2f0d93c, 0x0d32b202, 0xefeea97b, 0x002cc245, 0xeb1b7946, + 0x04d91278, 0xf5d249f5, 0x1a1022cb, 0xf12799c8, 0x1ee5f2f6, + 0xfc39e98f, 0x13fb82b1, 0xf8cc39b2, 0x170e528c, 0xc1ab88e9, + 0x2e69e3d7, 0xc55e58d4, 0x2a9c33ea, 0xc8402893, 0x278243ad, + 0xccb5f8ae, 0x23779390, 0xd27cc81d, 0x3dbea323, 0xd6891820, + 0x394b731e, 0xdb976867, 0x34550359, 0xdf62b85a, 0x30a0d364, + 0xa9580ad1, 0x469a61ef, 0xadaddaec, 0x426fb1d2, 0xa0b3aaab, + 0x4f71c195, 0xa4467a96, 0x4b8411a8, 0xba8f4a25, 0x554d211b, + 0xbe7a9a18, 0x51b8f126, 0xb364ea5f, 0x5ca68161, 0xb7913a62, + 0x5853515c, 0x8ef68b39, 0x6134e007, 0x8a035b04, 0x65c1303a, + 0x871d2b43, 0x68df407d, 0x83e8fb7e, 0x6c2a9040, 0x9d21cbcd, + 0x72e3a0f3, 0x99d41bf0, 0x761670ce, 0x94ca6bb7, 0x7b080089, + 0x903fbb8a, 0x7ffdd0b4, 0x78bf0ea1, 0x977d659f, 0x7c4ade9c, + 0x9388b5a2, 0x7154aedb, 0x9e96c5e5, 0x75a17ee6, 0x9a6315d8, + 0x6b684e55, 0x84aa256b, 0x6f9d9e68, 0x805ff556, 0x6283ee2f, + 0x8d418511, 0x66763e12, 0x89b4552c, 0x5f118f49, 0xb0d3e477, + 0x5be45f74, 0xb426344a, 0x56fa2f33, 0xb938440d, 0x520fff0e, + 0xbdcd9430, 0x4cc6cfbd, 0xa304a483, 0x48331f80, 0xa7f174be, + 0x452d6fc7, 0xaaef04f9, 0x41d8bffa, 0xae1ad4c4, 0x37e20d71, + 0xd820664f, 0x3317dd4c, 0xdcd5b672, 0x3e09ad0b, 0xd1cbc635, + 0x3afc7d36, 0xd53e1608, 0x24354d85, 0xcbf726bb, 0x20c09db8, + 0xcf02f686, 0x2ddeedff, 0xc21c86c1, 0x292b3dc2, 0xc6e956fc, + 0x104c8c99, 0xff8ee7a7, 0x14b95ca4, 0xfb7b379a, 0x19a72ce3, + 0xf66547dd, 0x1d52fcde, 0xf29097e0, 0x039bcc6d, 0xec59a753, + 0x076e1c50, 0xe8ac776e, 0x0a706c17, 0xe5b20729, 0x0e85bc2a, + 0xe147d714}, + {0x00000000, 0x177b1443, 0x2ef62886, 0x398d3cc5, 0x5dec510c, + 0x4a97454f, 0x731a798a, 0x64616dc9, 0xbbd8a218, 0xaca3b65b, + 0x952e8a9e, 0x82559edd, 0xe634f314, 0xf14fe757, 0xc8c2db92, + 0xdfb9cfd1, 0xacc04271, 0xbbbb5632, 0x82366af7, 0x954d7eb4, + 0xf12c137d, 0xe657073e, 0xdfda3bfb, 0xc8a12fb8, 0x1718e069, + 0x0063f42a, 0x39eec8ef, 0x2e95dcac, 0x4af4b165, 0x5d8fa526, + 0x640299e3, 0x73798da0, 0x82f182a3, 0x958a96e0, 0xac07aa25, + 0xbb7cbe66, 0xdf1dd3af, 0xc866c7ec, 0xf1ebfb29, 0xe690ef6a, + 0x392920bb, 0x2e5234f8, 0x17df083d, 0x00a41c7e, 0x64c571b7, + 0x73be65f4, 0x4a335931, 0x5d484d72, 0x2e31c0d2, 0x394ad491, + 0x00c7e854, 0x17bcfc17, 0x73dd91de, 0x64a6859d, 0x5d2bb958, + 0x4a50ad1b, 0x95e962ca, 0x82927689, 0xbb1f4a4c, 0xac645e0f, + 0xc80533c6, 0xdf7e2785, 0xe6f31b40, 0xf1880f03, 0xde920307, + 0xc9e91744, 0xf0642b81, 0xe71f3fc2, 0x837e520b, 0x94054648, + 0xad887a8d, 0xbaf36ece, 0x654aa11f, 0x7231b55c, 0x4bbc8999, + 0x5cc79dda, 0x38a6f013, 0x2fdde450, 0x1650d895, 0x012bccd6, + 0x72524176, 0x65295535, 0x5ca469f0, 0x4bdf7db3, 0x2fbe107a, + 0x38c50439, 0x014838fc, 0x16332cbf, 0xc98ae36e, 0xdef1f72d, + 0xe77ccbe8, 0xf007dfab, 0x9466b262, 0x831da621, 0xba909ae4, + 0xadeb8ea7, 0x5c6381a4, 0x4b1895e7, 0x7295a922, 0x65eebd61, + 0x018fd0a8, 0x16f4c4eb, 0x2f79f82e, 0x3802ec6d, 0xe7bb23bc, + 0xf0c037ff, 0xc94d0b3a, 0xde361f79, 0xba5772b0, 0xad2c66f3, + 0x94a15a36, 0x83da4e75, 0xf0a3c3d5, 0xe7d8d796, 0xde55eb53, + 0xc92eff10, 0xad4f92d9, 0xba34869a, 0x83b9ba5f, 0x94c2ae1c, + 0x4b7b61cd, 0x5c00758e, 0x658d494b, 0x72f65d08, 0x169730c1, + 0x01ec2482, 0x38611847, 0x2f1a0c04, 0x6655004f, 0x712e140c, + 0x48a328c9, 0x5fd83c8a, 0x3bb95143, 0x2cc24500, 0x154f79c5, + 0x02346d86, 0xdd8da257, 0xcaf6b614, 0xf37b8ad1, 0xe4009e92, + 0x8061f35b, 0x971ae718, 0xae97dbdd, 0xb9eccf9e, 0xca95423e, + 0xddee567d, 0xe4636ab8, 0xf3187efb, 0x97791332, 0x80020771, + 0xb98f3bb4, 0xaef42ff7, 0x714de026, 0x6636f465, 0x5fbbc8a0, + 0x48c0dce3, 0x2ca1b12a, 0x3bdaa569, 0x025799ac, 0x152c8def, + 0xe4a482ec, 0xf3df96af, 0xca52aa6a, 0xdd29be29, 0xb948d3e0, + 0xae33c7a3, 0x97befb66, 0x80c5ef25, 0x5f7c20f4, 0x480734b7, + 0x718a0872, 0x66f11c31, 0x029071f8, 0x15eb65bb, 0x2c66597e, + 0x3b1d4d3d, 0x4864c09d, 0x5f1fd4de, 0x6692e81b, 0x71e9fc58, + 0x15889191, 0x02f385d2, 0x3b7eb917, 0x2c05ad54, 0xf3bc6285, + 0xe4c776c6, 0xdd4a4a03, 0xca315e40, 0xae503389, 0xb92b27ca, + 0x80a61b0f, 0x97dd0f4c, 0xb8c70348, 0xafbc170b, 0x96312bce, + 0x814a3f8d, 0xe52b5244, 0xf2504607, 0xcbdd7ac2, 0xdca66e81, + 0x031fa150, 0x1464b513, 0x2de989d6, 0x3a929d95, 0x5ef3f05c, + 0x4988e41f, 0x7005d8da, 0x677ecc99, 0x14074139, 0x037c557a, + 0x3af169bf, 0x2d8a7dfc, 0x49eb1035, 0x5e900476, 0x671d38b3, + 0x70662cf0, 0xafdfe321, 0xb8a4f762, 0x8129cba7, 0x9652dfe4, + 0xf233b22d, 0xe548a66e, 0xdcc59aab, 0xcbbe8ee8, 0x3a3681eb, + 0x2d4d95a8, 0x14c0a96d, 0x03bbbd2e, 0x67dad0e7, 0x70a1c4a4, + 0x492cf861, 0x5e57ec22, 0x81ee23f3, 0x969537b0, 0xaf180b75, + 0xb8631f36, 0xdc0272ff, 0xcb7966bc, 0xf2f45a79, 0xe58f4e3a, + 0x96f6c39a, 0x818dd7d9, 0xb800eb1c, 0xaf7bff5f, 0xcb1a9296, + 0xdc6186d5, 0xe5ecba10, 0xf297ae53, 0x2d2e6182, 0x3a5575c1, + 0x03d84904, 0x14a35d47, 0x70c2308e, 0x67b924cd, 0x5e341808, + 0x494f0c4b}}; + +local const z_word_t FAR crc_braid_big_table[][256] = { + {0x00000000, 0x43147b17, 0x8628f62e, 0xc53c8d39, 0x0c51ec5d, + 0x4f45974a, 0x8a791a73, 0xc96d6164, 0x18a2d8bb, 0x5bb6a3ac, + 0x9e8a2e95, 0xdd9e5582, 0x14f334e6, 0x57e74ff1, 0x92dbc2c8, + 0xd1cfb9df, 0x7142c0ac, 0x3256bbbb, 0xf76a3682, 0xb47e4d95, + 0x7d132cf1, 0x3e0757e6, 0xfb3bdadf, 0xb82fa1c8, 0x69e01817, + 0x2af46300, 0xefc8ee39, 0xacdc952e, 0x65b1f44a, 0x26a58f5d, + 0xe3990264, 0xa08d7973, 0xa382f182, 0xe0968a95, 0x25aa07ac, + 0x66be7cbb, 0xafd31ddf, 0xecc766c8, 0x29fbebf1, 0x6aef90e6, + 0xbb202939, 0xf834522e, 0x3d08df17, 0x7e1ca400, 0xb771c564, + 0xf465be73, 0x3159334a, 0x724d485d, 0xd2c0312e, 0x91d44a39, + 0x54e8c700, 0x17fcbc17, 0xde91dd73, 0x9d85a664, 0x58b92b5d, + 0x1bad504a, 0xca62e995, 0x89769282, 0x4c4a1fbb, 0x0f5e64ac, + 0xc63305c8, 0x85277edf, 0x401bf3e6, 0x030f88f1, 0x070392de, + 0x4417e9c9, 0x812b64f0, 0xc23f1fe7, 0x0b527e83, 0x48460594, + 0x8d7a88ad, 0xce6ef3ba, 0x1fa14a65, 0x5cb53172, 0x9989bc4b, + 0xda9dc75c, 0x13f0a638, 0x50e4dd2f, 0x95d85016, 0xd6cc2b01, + 0x76415272, 0x35552965, 0xf069a45c, 0xb37ddf4b, 0x7a10be2f, + 0x3904c538, 0xfc384801, 0xbf2c3316, 0x6ee38ac9, 0x2df7f1de, + 0xe8cb7ce7, 0xabdf07f0, 0x62b26694, 0x21a61d83, 0xe49a90ba, + 0xa78eebad, 0xa481635c, 0xe795184b, 0x22a99572, 0x61bdee65, + 0xa8d08f01, 0xebc4f416, 0x2ef8792f, 0x6dec0238, 0xbc23bbe7, + 0xff37c0f0, 0x3a0b4dc9, 0x791f36de, 0xb07257ba, 0xf3662cad, + 0x365aa194, 0x754eda83, 0xd5c3a3f0, 0x96d7d8e7, 0x53eb55de, + 0x10ff2ec9, 0xd9924fad, 0x9a8634ba, 0x5fbab983, 0x1caec294, + 0xcd617b4b, 0x8e75005c, 0x4b498d65, 0x085df672, 0xc1309716, + 0x8224ec01, 0x47186138, 0x040c1a2f, 0x4f005566, 0x0c142e71, + 0xc928a348, 0x8a3cd85f, 0x4351b93b, 0x0045c22c, 0xc5794f15, + 0x866d3402, 0x57a28ddd, 0x14b6f6ca, 0xd18a7bf3, 0x929e00e4, + 0x5bf36180, 0x18e71a97, 0xdddb97ae, 0x9ecfecb9, 0x3e4295ca, + 0x7d56eedd, 0xb86a63e4, 0xfb7e18f3, 0x32137997, 0x71070280, + 0xb43b8fb9, 0xf72ff4ae, 0x26e04d71, 0x65f43666, 0xa0c8bb5f, + 0xe3dcc048, 0x2ab1a12c, 0x69a5da3b, 0xac995702, 0xef8d2c15, + 0xec82a4e4, 0xaf96dff3, 0x6aaa52ca, 0x29be29dd, 0xe0d348b9, + 0xa3c733ae, 0x66fbbe97, 0x25efc580, 0xf4207c5f, 0xb7340748, + 0x72088a71, 0x311cf166, 0xf8719002, 0xbb65eb15, 0x7e59662c, + 0x3d4d1d3b, 0x9dc06448, 0xded41f5f, 0x1be89266, 0x58fce971, + 0x91918815, 0xd285f302, 0x17b97e3b, 0x54ad052c, 0x8562bcf3, + 0xc676c7e4, 0x034a4add, 0x405e31ca, 0x893350ae, 0xca272bb9, + 0x0f1ba680, 0x4c0fdd97, 0x4803c7b8, 0x0b17bcaf, 0xce2b3196, + 0x8d3f4a81, 0x44522be5, 0x074650f2, 0xc27addcb, 0x816ea6dc, + 0x50a11f03, 0x13b56414, 0xd689e92d, 0x959d923a, 0x5cf0f35e, + 0x1fe48849, 0xdad80570, 0x99cc7e67, 0x39410714, 0x7a557c03, + 0xbf69f13a, 0xfc7d8a2d, 0x3510eb49, 0x7604905e, 0xb3381d67, + 0xf02c6670, 0x21e3dfaf, 0x62f7a4b8, 0xa7cb2981, 0xe4df5296, + 0x2db233f2, 0x6ea648e5, 0xab9ac5dc, 0xe88ebecb, 0xeb81363a, + 0xa8954d2d, 0x6da9c014, 0x2ebdbb03, 0xe7d0da67, 0xa4c4a170, + 0x61f82c49, 0x22ec575e, 0xf323ee81, 0xb0379596, 0x750b18af, + 0x361f63b8, 0xff7202dc, 0xbc6679cb, 0x795af4f2, 0x3a4e8fe5, + 0x9ac3f696, 0xd9d78d81, 0x1ceb00b8, 0x5fff7baf, 0x96921acb, + 0xd58661dc, 0x10baece5, 0x53ae97f2, 0x82612e2d, 0xc175553a, + 0x0449d803, 0x475da314, 0x8e30c270, 0xcd24b967, 0x0818345e, + 0x4b0c4f49}, + {0x00000000, 0x3e6bc2ef, 0x3dd0f504, 0x03bb37eb, 0x7aa0eb09, + 0x44cb29e6, 0x47701e0d, 0x791bdce2, 0xf440d713, 0xca2b15fc, + 0xc9902217, 0xf7fbe0f8, 0x8ee03c1a, 0xb08bfef5, 0xb330c91e, + 0x8d5b0bf1, 0xe881ae27, 0xd6ea6cc8, 0xd5515b23, 0xeb3a99cc, + 0x9221452e, 0xac4a87c1, 0xaff1b02a, 0x919a72c5, 0x1cc17934, + 0x22aabbdb, 0x21118c30, 0x1f7a4edf, 0x6661923d, 0x580a50d2, + 0x5bb16739, 0x65daa5d6, 0xd0035d4f, 0xee689fa0, 0xedd3a84b, + 0xd3b86aa4, 0xaaa3b646, 0x94c874a9, 0x97734342, 0xa91881ad, + 0x24438a5c, 0x1a2848b3, 0x19937f58, 0x27f8bdb7, 0x5ee36155, + 0x6088a3ba, 0x63339451, 0x5d5856be, 0x3882f368, 0x06e93187, + 0x0552066c, 0x3b39c483, 0x42221861, 0x7c49da8e, 0x7ff2ed65, + 0x41992f8a, 0xccc2247b, 0xf2a9e694, 0xf112d17f, 0xcf791390, + 0xb662cf72, 0x88090d9d, 0x8bb23a76, 0xb5d9f899, 0xa007ba9e, + 0x9e6c7871, 0x9dd74f9a, 0xa3bc8d75, 0xdaa75197, 0xe4cc9378, + 0xe777a493, 0xd91c667c, 0x54476d8d, 0x6a2caf62, 0x69979889, + 0x57fc5a66, 0x2ee78684, 0x108c446b, 0x13377380, 0x2d5cb16f, + 0x488614b9, 0x76edd656, 0x7556e1bd, 0x4b3d2352, 0x3226ffb0, + 0x0c4d3d5f, 0x0ff60ab4, 0x319dc85b, 0xbcc6c3aa, 0x82ad0145, + 0x811636ae, 0xbf7df441, 0xc66628a3, 0xf80dea4c, 0xfbb6dda7, + 0xc5dd1f48, 0x7004e7d1, 0x4e6f253e, 0x4dd412d5, 0x73bfd03a, + 0x0aa40cd8, 0x34cfce37, 0x3774f9dc, 0x091f3b33, 0x844430c2, + 0xba2ff22d, 0xb994c5c6, 0x87ff0729, 0xfee4dbcb, 0xc08f1924, + 0xc3342ecf, 0xfd5fec20, 0x988549f6, 0xa6ee8b19, 0xa555bcf2, + 0x9b3e7e1d, 0xe225a2ff, 0xdc4e6010, 0xdff557fb, 0xe19e9514, + 0x6cc59ee5, 0x52ae5c0a, 0x51156be1, 0x6f7ea90e, 0x166575ec, + 0x280eb703, 0x2bb580e8, 0x15de4207, 0x010905e6, 0x3f62c709, + 0x3cd9f0e2, 0x02b2320d, 0x7ba9eeef, 0x45c22c00, 0x46791beb, + 0x7812d904, 0xf549d2f5, 0xcb22101a, 0xc89927f1, 0xf6f2e51e, + 0x8fe939fc, 0xb182fb13, 0xb239ccf8, 0x8c520e17, 0xe988abc1, + 0xd7e3692e, 0xd4585ec5, 0xea339c2a, 0x932840c8, 0xad438227, + 0xaef8b5cc, 0x90937723, 0x1dc87cd2, 0x23a3be3d, 0x201889d6, + 0x1e734b39, 0x676897db, 0x59035534, 0x5ab862df, 0x64d3a030, + 0xd10a58a9, 0xef619a46, 0xecdaadad, 0xd2b16f42, 0xabaab3a0, + 0x95c1714f, 0x967a46a4, 0xa811844b, 0x254a8fba, 0x1b214d55, + 0x189a7abe, 0x26f1b851, 0x5fea64b3, 0x6181a65c, 0x623a91b7, + 0x5c515358, 0x398bf68e, 0x07e03461, 0x045b038a, 0x3a30c165, + 0x432b1d87, 0x7d40df68, 0x7efbe883, 0x40902a6c, 0xcdcb219d, + 0xf3a0e372, 0xf01bd499, 0xce701676, 0xb76bca94, 0x8900087b, + 0x8abb3f90, 0xb4d0fd7f, 0xa10ebf78, 0x9f657d97, 0x9cde4a7c, + 0xa2b58893, 0xdbae5471, 0xe5c5969e, 0xe67ea175, 0xd815639a, + 0x554e686b, 0x6b25aa84, 0x689e9d6f, 0x56f55f80, 0x2fee8362, + 0x1185418d, 0x123e7666, 0x2c55b489, 0x498f115f, 0x77e4d3b0, + 0x745fe45b, 0x4a3426b4, 0x332ffa56, 0x0d4438b9, 0x0eff0f52, + 0x3094cdbd, 0xbdcfc64c, 0x83a404a3, 0x801f3348, 0xbe74f1a7, + 0xc76f2d45, 0xf904efaa, 0xfabfd841, 0xc4d41aae, 0x710de237, + 0x4f6620d8, 0x4cdd1733, 0x72b6d5dc, 0x0bad093e, 0x35c6cbd1, + 0x367dfc3a, 0x08163ed5, 0x854d3524, 0xbb26f7cb, 0xb89dc020, + 0x86f602cf, 0xffedde2d, 0xc1861cc2, 0xc23d2b29, 0xfc56e9c6, + 0x998c4c10, 0xa7e78eff, 0xa45cb914, 0x9a377bfb, 0xe32ca719, + 0xdd4765f6, 0xdefc521d, 0xe09790f2, 0x6dcc9b03, 0x53a759ec, + 0x501c6e07, 0x6e77ace8, 0x176c700a, 0x2907b2e5, 0x2abc850e, + 0x14d747e1}, + {0x00000000, 0xc0df8ec1, 0xc1b96c58, 0x0166e299, 0x8273d9b0, + 0x42ac5771, 0x43cab5e8, 0x83153b29, 0x45e1c3ba, 0x853e4d7b, + 0x8458afe2, 0x44872123, 0xc7921a0a, 0x074d94cb, 0x062b7652, + 0xc6f4f893, 0xcbc4f6ae, 0x0b1b786f, 0x0a7d9af6, 0xcaa21437, + 0x49b72f1e, 0x8968a1df, 0x880e4346, 0x48d1cd87, 0x8e253514, + 0x4efabbd5, 0x4f9c594c, 0x8f43d78d, 0x0c56eca4, 0xcc896265, + 0xcdef80fc, 0x0d300e3d, 0xd78f9c86, 0x17501247, 0x1636f0de, + 0xd6e97e1f, 0x55fc4536, 0x9523cbf7, 0x9445296e, 0x549aa7af, + 0x926e5f3c, 0x52b1d1fd, 0x53d73364, 0x9308bda5, 0x101d868c, + 0xd0c2084d, 0xd1a4ead4, 0x117b6415, 0x1c4b6a28, 0xdc94e4e9, + 0xddf20670, 0x1d2d88b1, 0x9e38b398, 0x5ee73d59, 0x5f81dfc0, + 0x9f5e5101, 0x59aaa992, 0x99752753, 0x9813c5ca, 0x58cc4b0b, + 0xdbd97022, 0x1b06fee3, 0x1a601c7a, 0xdabf92bb, 0xef1948d6, + 0x2fc6c617, 0x2ea0248e, 0xee7faa4f, 0x6d6a9166, 0xadb51fa7, + 0xacd3fd3e, 0x6c0c73ff, 0xaaf88b6c, 0x6a2705ad, 0x6b41e734, + 0xab9e69f5, 0x288b52dc, 0xe854dc1d, 0xe9323e84, 0x29edb045, + 0x24ddbe78, 0xe40230b9, 0xe564d220, 0x25bb5ce1, 0xa6ae67c8, + 0x6671e909, 0x67170b90, 0xa7c88551, 0x613c7dc2, 0xa1e3f303, + 0xa085119a, 0x605a9f5b, 0xe34fa472, 0x23902ab3, 0x22f6c82a, + 0xe22946eb, 0x3896d450, 0xf8495a91, 0xf92fb808, 0x39f036c9, + 0xbae50de0, 0x7a3a8321, 0x7b5c61b8, 0xbb83ef79, 0x7d7717ea, + 0xbda8992b, 0xbcce7bb2, 0x7c11f573, 0xff04ce5a, 0x3fdb409b, + 0x3ebda202, 0xfe622cc3, 0xf35222fe, 0x338dac3f, 0x32eb4ea6, + 0xf234c067, 0x7121fb4e, 0xb1fe758f, 0xb0989716, 0x704719d7, + 0xb6b3e144, 0x766c6f85, 0x770a8d1c, 0xb7d503dd, 0x34c038f4, + 0xf41fb635, 0xf57954ac, 0x35a6da6d, 0x9f35e177, 0x5fea6fb6, + 0x5e8c8d2f, 0x9e5303ee, 0x1d4638c7, 0xdd99b606, 0xdcff549f, + 0x1c20da5e, 0xdad422cd, 0x1a0bac0c, 0x1b6d4e95, 0xdbb2c054, + 0x58a7fb7d, 0x987875bc, 0x991e9725, 0x59c119e4, 0x54f117d9, + 0x942e9918, 0x95487b81, 0x5597f540, 0xd682ce69, 0x165d40a8, + 0x173ba231, 0xd7e42cf0, 0x1110d463, 0xd1cf5aa2, 0xd0a9b83b, + 0x107636fa, 0x93630dd3, 0x53bc8312, 0x52da618b, 0x9205ef4a, + 0x48ba7df1, 0x8865f330, 0x890311a9, 0x49dc9f68, 0xcac9a441, + 0x0a162a80, 0x0b70c819, 0xcbaf46d8, 0x0d5bbe4b, 0xcd84308a, + 0xcce2d213, 0x0c3d5cd2, 0x8f2867fb, 0x4ff7e93a, 0x4e910ba3, + 0x8e4e8562, 0x837e8b5f, 0x43a1059e, 0x42c7e707, 0x821869c6, + 0x010d52ef, 0xc1d2dc2e, 0xc0b43eb7, 0x006bb076, 0xc69f48e5, + 0x0640c624, 0x072624bd, 0xc7f9aa7c, 0x44ec9155, 0x84331f94, + 0x8555fd0d, 0x458a73cc, 0x702ca9a1, 0xb0f32760, 0xb195c5f9, + 0x714a4b38, 0xf25f7011, 0x3280fed0, 0x33e61c49, 0xf3399288, + 0x35cd6a1b, 0xf512e4da, 0xf4740643, 0x34ab8882, 0xb7beb3ab, + 0x77613d6a, 0x7607dff3, 0xb6d85132, 0xbbe85f0f, 0x7b37d1ce, + 0x7a513357, 0xba8ebd96, 0x399b86bf, 0xf944087e, 0xf822eae7, + 0x38fd6426, 0xfe099cb5, 0x3ed61274, 0x3fb0f0ed, 0xff6f7e2c, + 0x7c7a4505, 0xbca5cbc4, 0xbdc3295d, 0x7d1ca79c, 0xa7a33527, + 0x677cbbe6, 0x661a597f, 0xa6c5d7be, 0x25d0ec97, 0xe50f6256, + 0xe46980cf, 0x24b60e0e, 0xe242f69d, 0x229d785c, 0x23fb9ac5, + 0xe3241404, 0x60312f2d, 0xa0eea1ec, 0xa1884375, 0x6157cdb4, + 0x6c67c389, 0xacb84d48, 0xaddeafd1, 0x6d012110, 0xee141a39, + 0x2ecb94f8, 0x2fad7661, 0xef72f8a0, 0x29860033, 0xe9598ef2, + 0xe83f6c6b, 0x28e0e2aa, 0xabf5d983, 0x6b2a5742, 0x6a4cb5db, + 0xaa933b1a}, + {0x00000000, 0x6f4ca59b, 0x9f9e3bec, 0xf0d29e77, 0x7f3b0603, + 0x1077a398, 0xe0a53def, 0x8fe99874, 0xfe760c06, 0x913aa99d, + 0x61e837ea, 0x0ea49271, 0x814d0a05, 0xee01af9e, 0x1ed331e9, + 0x719f9472, 0xfced180c, 0x93a1bd97, 0x637323e0, 0x0c3f867b, + 0x83d61e0f, 0xec9abb94, 0x1c4825e3, 0x73048078, 0x029b140a, + 0x6dd7b191, 0x9d052fe6, 0xf2498a7d, 0x7da01209, 0x12ecb792, + 0xe23e29e5, 0x8d728c7e, 0xf8db3118, 0x97979483, 0x67450af4, + 0x0809af6f, 0x87e0371b, 0xe8ac9280, 0x187e0cf7, 0x7732a96c, + 0x06ad3d1e, 0x69e19885, 0x993306f2, 0xf67fa369, 0x79963b1d, + 0x16da9e86, 0xe60800f1, 0x8944a56a, 0x04362914, 0x6b7a8c8f, + 0x9ba812f8, 0xf4e4b763, 0x7b0d2f17, 0x14418a8c, 0xe49314fb, + 0x8bdfb160, 0xfa402512, 0x950c8089, 0x65de1efe, 0x0a92bb65, + 0x857b2311, 0xea37868a, 0x1ae518fd, 0x75a9bd66, 0xf0b76330, + 0x9ffbc6ab, 0x6f2958dc, 0x0065fd47, 0x8f8c6533, 0xe0c0c0a8, + 0x10125edf, 0x7f5efb44, 0x0ec16f36, 0x618dcaad, 0x915f54da, + 0xfe13f141, 0x71fa6935, 0x1eb6ccae, 0xee6452d9, 0x8128f742, + 0x0c5a7b3c, 0x6316dea7, 0x93c440d0, 0xfc88e54b, 0x73617d3f, + 0x1c2dd8a4, 0xecff46d3, 0x83b3e348, 0xf22c773a, 0x9d60d2a1, + 0x6db24cd6, 0x02fee94d, 0x8d177139, 0xe25bd4a2, 0x12894ad5, + 0x7dc5ef4e, 0x086c5228, 0x6720f7b3, 0x97f269c4, 0xf8becc5f, + 0x7757542b, 0x181bf1b0, 0xe8c96fc7, 0x8785ca5c, 0xf61a5e2e, + 0x9956fbb5, 0x698465c2, 0x06c8c059, 0x8921582d, 0xe66dfdb6, + 0x16bf63c1, 0x79f3c65a, 0xf4814a24, 0x9bcdefbf, 0x6b1f71c8, + 0x0453d453, 0x8bba4c27, 0xe4f6e9bc, 0x142477cb, 0x7b68d250, + 0x0af74622, 0x65bbe3b9, 0x95697dce, 0xfa25d855, 0x75cc4021, + 0x1a80e5ba, 0xea527bcd, 0x851ede56, 0xe06fc760, 0x8f2362fb, + 0x7ff1fc8c, 0x10bd5917, 0x9f54c163, 0xf01864f8, 0x00cafa8f, + 0x6f865f14, 0x1e19cb66, 0x71556efd, 0x8187f08a, 0xeecb5511, + 0x6122cd65, 0x0e6e68fe, 0xfebcf689, 0x91f05312, 0x1c82df6c, + 0x73ce7af7, 0x831ce480, 0xec50411b, 0x63b9d96f, 0x0cf57cf4, + 0xfc27e283, 0x936b4718, 0xe2f4d36a, 0x8db876f1, 0x7d6ae886, + 0x12264d1d, 0x9dcfd569, 0xf28370f2, 0x0251ee85, 0x6d1d4b1e, + 0x18b4f678, 0x77f853e3, 0x872acd94, 0xe866680f, 0x678ff07b, + 0x08c355e0, 0xf811cb97, 0x975d6e0c, 0xe6c2fa7e, 0x898e5fe5, + 0x795cc192, 0x16106409, 0x99f9fc7d, 0xf6b559e6, 0x0667c791, + 0x692b620a, 0xe459ee74, 0x8b154bef, 0x7bc7d598, 0x148b7003, + 0x9b62e877, 0xf42e4dec, 0x04fcd39b, 0x6bb07600, 0x1a2fe272, + 0x756347e9, 0x85b1d99e, 0xeafd7c05, 0x6514e471, 0x0a5841ea, + 0xfa8adf9d, 0x95c67a06, 0x10d8a450, 0x7f9401cb, 0x8f469fbc, + 0xe00a3a27, 0x6fe3a253, 0x00af07c8, 0xf07d99bf, 0x9f313c24, + 0xeeaea856, 0x81e20dcd, 0x713093ba, 0x1e7c3621, 0x9195ae55, + 0xfed90bce, 0x0e0b95b9, 0x61473022, 0xec35bc5c, 0x837919c7, + 0x73ab87b0, 0x1ce7222b, 0x930eba5f, 0xfc421fc4, 0x0c9081b3, + 0x63dc2428, 0x1243b05a, 0x7d0f15c1, 0x8ddd8bb6, 0xe2912e2d, + 0x6d78b659, 0x023413c2, 0xf2e68db5, 0x9daa282e, 0xe8039548, + 0x874f30d3, 0x779daea4, 0x18d10b3f, 0x9738934b, 0xf87436d0, + 0x08a6a8a7, 0x67ea0d3c, 0x1675994e, 0x79393cd5, 0x89eba2a2, + 0xe6a70739, 0x694e9f4d, 0x06023ad6, 0xf6d0a4a1, 0x999c013a, + 0x14ee8d44, 0x7ba228df, 0x8b70b6a8, 0xe43c1333, 0x6bd58b47, + 0x04992edc, 0xf44bb0ab, 0x9b071530, 0xea988142, 0x85d424d9, + 0x7506baae, 0x1a4a1f35, 0x95a38741, 0xfaef22da, 0x0a3dbcad, + 0x65711936}}; + +#endif + +#endif + +#if N == 4 + +#if W == 8 + +local const z_crc_t FAR crc_braid_table[][256] = { + {0x00000000, 0xf1da05aa, 0x38c50d15, 0xc91f08bf, 0x718a1a2a, + 0x80501f80, 0x494f173f, 0xb8951295, 0xe3143454, 0x12ce31fe, + 0xdbd13941, 0x2a0b3ceb, 0x929e2e7e, 0x63442bd4, 0xaa5b236b, + 0x5b8126c1, 0x1d596ee9, 0xec836b43, 0x259c63fc, 0xd4466656, + 0x6cd374c3, 0x9d097169, 0x541679d6, 0xa5cc7c7c, 0xfe4d5abd, + 0x0f975f17, 0xc68857a8, 0x37525202, 0x8fc74097, 0x7e1d453d, + 0xb7024d82, 0x46d84828, 0x3ab2ddd2, 0xcb68d878, 0x0277d0c7, + 0xf3add56d, 0x4b38c7f8, 0xbae2c252, 0x73fdcaed, 0x8227cf47, + 0xd9a6e986, 0x287cec2c, 0xe163e493, 0x10b9e139, 0xa82cf3ac, + 0x59f6f606, 0x90e9feb9, 0x6133fb13, 0x27ebb33b, 0xd631b691, + 0x1f2ebe2e, 0xeef4bb84, 0x5661a911, 0xa7bbacbb, 0x6ea4a404, + 0x9f7ea1ae, 0xc4ff876f, 0x352582c5, 0xfc3a8a7a, 0x0de08fd0, + 0xb5759d45, 0x44af98ef, 0x8db09050, 0x7c6a95fa, 0x7565bba4, + 0x84bfbe0e, 0x4da0b6b1, 0xbc7ab31b, 0x04efa18e, 0xf535a424, + 0x3c2aac9b, 0xcdf0a931, 0x96718ff0, 0x67ab8a5a, 0xaeb482e5, + 0x5f6e874f, 0xe7fb95da, 0x16219070, 0xdf3e98cf, 0x2ee49d65, + 0x683cd54d, 0x99e6d0e7, 0x50f9d858, 0xa123ddf2, 0x19b6cf67, + 0xe86ccacd, 0x2173c272, 0xd0a9c7d8, 0x8b28e119, 0x7af2e4b3, + 0xb3edec0c, 0x4237e9a6, 0xfaa2fb33, 0x0b78fe99, 0xc267f626, + 0x33bdf38c, 0x4fd76676, 0xbe0d63dc, 0x77126b63, 0x86c86ec9, + 0x3e5d7c5c, 0xcf8779f6, 0x06987149, 0xf74274e3, 0xacc35222, + 0x5d195788, 0x94065f37, 0x65dc5a9d, 0xdd494808, 0x2c934da2, + 0xe58c451d, 0x145640b7, 0x528e089f, 0xa3540d35, 0x6a4b058a, + 0x9b910020, 0x230412b5, 0xd2de171f, 0x1bc11fa0, 0xea1b1a0a, + 0xb19a3ccb, 0x40403961, 0x895f31de, 0x78853474, 0xc01026e1, + 0x31ca234b, 0xf8d52bf4, 0x090f2e5e, 0xeacb7748, 0x1b1172e2, + 0xd20e7a5d, 0x23d47ff7, 0x9b416d62, 0x6a9b68c8, 0xa3846077, + 0x525e65dd, 0x09df431c, 0xf80546b6, 0x311a4e09, 0xc0c04ba3, + 0x78555936, 0x898f5c9c, 0x40905423, 0xb14a5189, 0xf79219a1, + 0x06481c0b, 0xcf5714b4, 0x3e8d111e, 0x8618038b, 0x77c20621, + 0xbedd0e9e, 0x4f070b34, 0x14862df5, 0xe55c285f, 0x2c4320e0, + 0xdd99254a, 0x650c37df, 0x94d63275, 0x5dc93aca, 0xac133f60, + 0xd079aa9a, 0x21a3af30, 0xe8bca78f, 0x1966a225, 0xa1f3b0b0, + 0x5029b51a, 0x9936bda5, 0x68ecb80f, 0x336d9ece, 0xc2b79b64, + 0x0ba893db, 0xfa729671, 0x42e784e4, 0xb33d814e, 0x7a2289f1, + 0x8bf88c5b, 0xcd20c473, 0x3cfac1d9, 0xf5e5c966, 0x043fcccc, + 0xbcaade59, 0x4d70dbf3, 0x846fd34c, 0x75b5d6e6, 0x2e34f027, + 0xdfeef58d, 0x16f1fd32, 0xe72bf898, 0x5fbeea0d, 0xae64efa7, + 0x677be718, 0x96a1e2b2, 0x9faeccec, 0x6e74c946, 0xa76bc1f9, + 0x56b1c453, 0xee24d6c6, 0x1ffed36c, 0xd6e1dbd3, 0x273bde79, + 0x7cbaf8b8, 0x8d60fd12, 0x447ff5ad, 0xb5a5f007, 0x0d30e292, + 0xfceae738, 0x35f5ef87, 0xc42fea2d, 0x82f7a205, 0x732da7af, + 0xba32af10, 0x4be8aaba, 0xf37db82f, 0x02a7bd85, 0xcbb8b53a, + 0x3a62b090, 0x61e39651, 0x903993fb, 0x59269b44, 0xa8fc9eee, + 0x10698c7b, 0xe1b389d1, 0x28ac816e, 0xd97684c4, 0xa51c113e, + 0x54c61494, 0x9dd91c2b, 0x6c031981, 0xd4960b14, 0x254c0ebe, + 0xec530601, 0x1d8903ab, 0x4608256a, 0xb7d220c0, 0x7ecd287f, + 0x8f172dd5, 0x37823f40, 0xc6583aea, 0x0f473255, 0xfe9d37ff, + 0xb8457fd7, 0x499f7a7d, 0x808072c2, 0x715a7768, 0xc9cf65fd, + 0x38156057, 0xf10a68e8, 0x00d06d42, 0x5b514b83, 0xaa8b4e29, + 0x63944696, 0x924e433c, 0x2adb51a9, 0xdb015403, 0x121e5cbc, + 0xe3c45916}, + {0x00000000, 0x0ee7e8d1, 0x1dcfd1a2, 0x13283973, 0x3b9fa344, + 0x35784b95, 0x265072e6, 0x28b79a37, 0x773f4688, 0x79d8ae59, + 0x6af0972a, 0x64177ffb, 0x4ca0e5cc, 0x42470d1d, 0x516f346e, + 0x5f88dcbf, 0xee7e8d10, 0xe09965c1, 0xf3b15cb2, 0xfd56b463, + 0xd5e12e54, 0xdb06c685, 0xc82efff6, 0xc6c91727, 0x9941cb98, + 0x97a62349, 0x848e1a3a, 0x8a69f2eb, 0xa2de68dc, 0xac39800d, + 0xbf11b97e, 0xb1f651af, 0x078c1c61, 0x096bf4b0, 0x1a43cdc3, + 0x14a42512, 0x3c13bf25, 0x32f457f4, 0x21dc6e87, 0x2f3b8656, + 0x70b35ae9, 0x7e54b238, 0x6d7c8b4b, 0x639b639a, 0x4b2cf9ad, + 0x45cb117c, 0x56e3280f, 0x5804c0de, 0xe9f29171, 0xe71579a0, + 0xf43d40d3, 0xfadaa802, 0xd26d3235, 0xdc8adae4, 0xcfa2e397, + 0xc1450b46, 0x9ecdd7f9, 0x902a3f28, 0x8302065b, 0x8de5ee8a, + 0xa55274bd, 0xabb59c6c, 0xb89da51f, 0xb67a4dce, 0x0f1838c2, + 0x01ffd013, 0x12d7e960, 0x1c3001b1, 0x34879b86, 0x3a607357, + 0x29484a24, 0x27afa2f5, 0x78277e4a, 0x76c0969b, 0x65e8afe8, + 0x6b0f4739, 0x43b8dd0e, 0x4d5f35df, 0x5e770cac, 0x5090e47d, + 0xe166b5d2, 0xef815d03, 0xfca96470, 0xf24e8ca1, 0xdaf91696, + 0xd41efe47, 0xc736c734, 0xc9d12fe5, 0x9659f35a, 0x98be1b8b, + 0x8b9622f8, 0x8571ca29, 0xadc6501e, 0xa321b8cf, 0xb00981bc, + 0xbeee696d, 0x089424a3, 0x0673cc72, 0x155bf501, 0x1bbc1dd0, + 0x330b87e7, 0x3dec6f36, 0x2ec45645, 0x2023be94, 0x7fab622b, + 0x714c8afa, 0x6264b389, 0x6c835b58, 0x4434c16f, 0x4ad329be, + 0x59fb10cd, 0x571cf81c, 0xe6eaa9b3, 0xe80d4162, 0xfb257811, + 0xf5c290c0, 0xdd750af7, 0xd392e226, 0xc0badb55, 0xce5d3384, + 0x91d5ef3b, 0x9f3207ea, 0x8c1a3e99, 0x82fdd648, 0xaa4a4c7f, + 0xa4ada4ae, 0xb7859ddd, 0xb962750c, 0x1e307184, 0x10d79955, + 0x03ffa026, 0x0d1848f7, 0x25afd2c0, 0x2b483a11, 0x38600362, + 0x3687ebb3, 0x690f370c, 0x67e8dfdd, 0x74c0e6ae, 0x7a270e7f, + 0x52909448, 0x5c777c99, 0x4f5f45ea, 0x41b8ad3b, 0xf04efc94, + 0xfea91445, 0xed812d36, 0xe366c5e7, 0xcbd15fd0, 0xc536b701, + 0xd61e8e72, 0xd8f966a3, 0x8771ba1c, 0x899652cd, 0x9abe6bbe, + 0x9459836f, 0xbcee1958, 0xb209f189, 0xa121c8fa, 0xafc6202b, + 0x19bc6de5, 0x175b8534, 0x0473bc47, 0x0a945496, 0x2223cea1, + 0x2cc42670, 0x3fec1f03, 0x310bf7d2, 0x6e832b6d, 0x6064c3bc, + 0x734cfacf, 0x7dab121e, 0x551c8829, 0x5bfb60f8, 0x48d3598b, + 0x4634b15a, 0xf7c2e0f5, 0xf9250824, 0xea0d3157, 0xe4ead986, + 0xcc5d43b1, 0xc2baab60, 0xd1929213, 0xdf757ac2, 0x80fda67d, + 0x8e1a4eac, 0x9d3277df, 0x93d59f0e, 0xbb620539, 0xb585ede8, + 0xa6add49b, 0xa84a3c4a, 0x11284946, 0x1fcfa197, 0x0ce798e4, + 0x02007035, 0x2ab7ea02, 0x245002d3, 0x37783ba0, 0x399fd371, + 0x66170fce, 0x68f0e71f, 0x7bd8de6c, 0x753f36bd, 0x5d88ac8a, + 0x536f445b, 0x40477d28, 0x4ea095f9, 0xff56c456, 0xf1b12c87, + 0xe29915f4, 0xec7efd25, 0xc4c96712, 0xca2e8fc3, 0xd906b6b0, + 0xd7e15e61, 0x886982de, 0x868e6a0f, 0x95a6537c, 0x9b41bbad, + 0xb3f6219a, 0xbd11c94b, 0xae39f038, 0xa0de18e9, 0x16a45527, + 0x1843bdf6, 0x0b6b8485, 0x058c6c54, 0x2d3bf663, 0x23dc1eb2, + 0x30f427c1, 0x3e13cf10, 0x619b13af, 0x6f7cfb7e, 0x7c54c20d, + 0x72b32adc, 0x5a04b0eb, 0x54e3583a, 0x47cb6149, 0x492c8998, + 0xf8dad837, 0xf63d30e6, 0xe5150995, 0xebf2e144, 0xc3457b73, + 0xcda293a2, 0xde8aaad1, 0xd06d4200, 0x8fe59ebf, 0x8102766e, + 0x922a4f1d, 0x9ccda7cc, 0xb47a3dfb, 0xba9dd52a, 0xa9b5ec59, + 0xa7520488}, + {0x00000000, 0x3c60e308, 0x78c1c610, 0x44a12518, 0xf1838c20, + 0xcde36f28, 0x89424a30, 0xb522a938, 0x38761e01, 0x0416fd09, + 0x40b7d811, 0x7cd73b19, 0xc9f59221, 0xf5957129, 0xb1345431, + 0x8d54b739, 0x70ec3c02, 0x4c8cdf0a, 0x082dfa12, 0x344d191a, + 0x816fb022, 0xbd0f532a, 0xf9ae7632, 0xc5ce953a, 0x489a2203, + 0x74fac10b, 0x305be413, 0x0c3b071b, 0xb919ae23, 0x85794d2b, + 0xc1d86833, 0xfdb88b3b, 0xe1d87804, 0xddb89b0c, 0x9919be14, + 0xa5795d1c, 0x105bf424, 0x2c3b172c, 0x689a3234, 0x54fad13c, + 0xd9ae6605, 0xe5ce850d, 0xa16fa015, 0x9d0f431d, 0x282dea25, + 0x144d092d, 0x50ec2c35, 0x6c8ccf3d, 0x91344406, 0xad54a70e, + 0xe9f58216, 0xd595611e, 0x60b7c826, 0x5cd72b2e, 0x18760e36, + 0x2416ed3e, 0xa9425a07, 0x9522b90f, 0xd1839c17, 0xede37f1f, + 0x58c1d627, 0x64a1352f, 0x20001037, 0x1c60f33f, 0x18c1f649, + 0x24a11541, 0x60003059, 0x5c60d351, 0xe9427a69, 0xd5229961, + 0x9183bc79, 0xade35f71, 0x20b7e848, 0x1cd70b40, 0x58762e58, + 0x6416cd50, 0xd1346468, 0xed548760, 0xa9f5a278, 0x95954170, + 0x682dca4b, 0x544d2943, 0x10ec0c5b, 0x2c8cef53, 0x99ae466b, + 0xa5cea563, 0xe16f807b, 0xdd0f6373, 0x505bd44a, 0x6c3b3742, + 0x289a125a, 0x14faf152, 0xa1d8586a, 0x9db8bb62, 0xd9199e7a, + 0xe5797d72, 0xf9198e4d, 0xc5796d45, 0x81d8485d, 0xbdb8ab55, + 0x089a026d, 0x34fae165, 0x705bc47d, 0x4c3b2775, 0xc16f904c, + 0xfd0f7344, 0xb9ae565c, 0x85ceb554, 0x30ec1c6c, 0x0c8cff64, + 0x482dda7c, 0x744d3974, 0x89f5b24f, 0xb5955147, 0xf134745f, + 0xcd549757, 0x78763e6f, 0x4416dd67, 0x00b7f87f, 0x3cd71b77, + 0xb183ac4e, 0x8de34f46, 0xc9426a5e, 0xf5228956, 0x4000206e, + 0x7c60c366, 0x38c1e67e, 0x04a10576, 0x3183ec92, 0x0de30f9a, + 0x49422a82, 0x7522c98a, 0xc00060b2, 0xfc6083ba, 0xb8c1a6a2, + 0x84a145aa, 0x09f5f293, 0x3595119b, 0x71343483, 0x4d54d78b, + 0xf8767eb3, 0xc4169dbb, 0x80b7b8a3, 0xbcd75bab, 0x416fd090, + 0x7d0f3398, 0x39ae1680, 0x05cef588, 0xb0ec5cb0, 0x8c8cbfb8, + 0xc82d9aa0, 0xf44d79a8, 0x7919ce91, 0x45792d99, 0x01d80881, + 0x3db8eb89, 0x889a42b1, 0xb4faa1b9, 0xf05b84a1, 0xcc3b67a9, + 0xd05b9496, 0xec3b779e, 0xa89a5286, 0x94fab18e, 0x21d818b6, + 0x1db8fbbe, 0x5919dea6, 0x65793dae, 0xe82d8a97, 0xd44d699f, + 0x90ec4c87, 0xac8caf8f, 0x19ae06b7, 0x25cee5bf, 0x616fc0a7, + 0x5d0f23af, 0xa0b7a894, 0x9cd74b9c, 0xd8766e84, 0xe4168d8c, + 0x513424b4, 0x6d54c7bc, 0x29f5e2a4, 0x159501ac, 0x98c1b695, + 0xa4a1559d, 0xe0007085, 0xdc60938d, 0x69423ab5, 0x5522d9bd, + 0x1183fca5, 0x2de31fad, 0x29421adb, 0x1522f9d3, 0x5183dccb, + 0x6de33fc3, 0xd8c196fb, 0xe4a175f3, 0xa00050eb, 0x9c60b3e3, + 0x113404da, 0x2d54e7d2, 0x69f5c2ca, 0x559521c2, 0xe0b788fa, + 0xdcd76bf2, 0x98764eea, 0xa416ade2, 0x59ae26d9, 0x65cec5d1, + 0x216fe0c9, 0x1d0f03c1, 0xa82daaf9, 0x944d49f1, 0xd0ec6ce9, + 0xec8c8fe1, 0x61d838d8, 0x5db8dbd0, 0x1919fec8, 0x25791dc0, + 0x905bb4f8, 0xac3b57f0, 0xe89a72e8, 0xd4fa91e0, 0xc89a62df, + 0xf4fa81d7, 0xb05ba4cf, 0x8c3b47c7, 0x3919eeff, 0x05790df7, + 0x41d828ef, 0x7db8cbe7, 0xf0ec7cde, 0xcc8c9fd6, 0x882dbace, + 0xb44d59c6, 0x016ff0fe, 0x3d0f13f6, 0x79ae36ee, 0x45ced5e6, + 0xb8765edd, 0x8416bdd5, 0xc0b798cd, 0xfcd77bc5, 0x49f5d2fd, + 0x759531f5, 0x313414ed, 0x0d54f7e5, 0x800040dc, 0xbc60a3d4, + 0xf8c186cc, 0xc4a165c4, 0x7183ccfc, 0x4de32ff4, 0x09420aec, + 0x3522e9e4}, + {0x00000000, 0x6307d924, 0xc60fb248, 0xa5086b6c, 0x576e62d1, + 0x3469bbf5, 0x9161d099, 0xf26609bd, 0xaedcc5a2, 0xcddb1c86, + 0x68d377ea, 0x0bd4aece, 0xf9b2a773, 0x9ab57e57, 0x3fbd153b, + 0x5cbacc1f, 0x86c88d05, 0xe5cf5421, 0x40c73f4d, 0x23c0e669, + 0xd1a6efd4, 0xb2a136f0, 0x17a95d9c, 0x74ae84b8, 0x281448a7, + 0x4b139183, 0xee1bfaef, 0x8d1c23cb, 0x7f7a2a76, 0x1c7df352, + 0xb975983e, 0xda72411a, 0xd6e01c4b, 0xb5e7c56f, 0x10efae03, + 0x73e87727, 0x818e7e9a, 0xe289a7be, 0x4781ccd2, 0x248615f6, + 0x783cd9e9, 0x1b3b00cd, 0xbe336ba1, 0xdd34b285, 0x2f52bb38, + 0x4c55621c, 0xe95d0970, 0x8a5ad054, 0x5028914e, 0x332f486a, + 0x96272306, 0xf520fa22, 0x0746f39f, 0x64412abb, 0xc14941d7, + 0xa24e98f3, 0xfef454ec, 0x9df38dc8, 0x38fbe6a4, 0x5bfc3f80, + 0xa99a363d, 0xca9def19, 0x6f958475, 0x0c925d51, 0x76b13ed7, + 0x15b6e7f3, 0xb0be8c9f, 0xd3b955bb, 0x21df5c06, 0x42d88522, + 0xe7d0ee4e, 0x84d7376a, 0xd86dfb75, 0xbb6a2251, 0x1e62493d, + 0x7d659019, 0x8f0399a4, 0xec044080, 0x490c2bec, 0x2a0bf2c8, + 0xf079b3d2, 0x937e6af6, 0x3676019a, 0x5571d8be, 0xa717d103, + 0xc4100827, 0x6118634b, 0x021fba6f, 0x5ea57670, 0x3da2af54, + 0x98aac438, 0xfbad1d1c, 0x09cb14a1, 0x6acccd85, 0xcfc4a6e9, + 0xacc37fcd, 0xa051229c, 0xc356fbb8, 0x665e90d4, 0x055949f0, + 0xf73f404d, 0x94389969, 0x3130f205, 0x52372b21, 0x0e8de73e, + 0x6d8a3e1a, 0xc8825576, 0xab858c52, 0x59e385ef, 0x3ae45ccb, + 0x9fec37a7, 0xfcebee83, 0x2699af99, 0x459e76bd, 0xe0961dd1, + 0x8391c4f5, 0x71f7cd48, 0x12f0146c, 0xb7f87f00, 0xd4ffa624, + 0x88456a3b, 0xeb42b31f, 0x4e4ad873, 0x2d4d0157, 0xdf2b08ea, + 0xbc2cd1ce, 0x1924baa2, 0x7a236386, 0xed627dae, 0x8e65a48a, + 0x2b6dcfe6, 0x486a16c2, 0xba0c1f7f, 0xd90bc65b, 0x7c03ad37, + 0x1f047413, 0x43beb80c, 0x20b96128, 0x85b10a44, 0xe6b6d360, + 0x14d0dadd, 0x77d703f9, 0xd2df6895, 0xb1d8b1b1, 0x6baaf0ab, + 0x08ad298f, 0xada542e3, 0xcea29bc7, 0x3cc4927a, 0x5fc34b5e, + 0xfacb2032, 0x99ccf916, 0xc5763509, 0xa671ec2d, 0x03798741, + 0x607e5e65, 0x921857d8, 0xf11f8efc, 0x5417e590, 0x37103cb4, + 0x3b8261e5, 0x5885b8c1, 0xfd8dd3ad, 0x9e8a0a89, 0x6cec0334, + 0x0febda10, 0xaae3b17c, 0xc9e46858, 0x955ea447, 0xf6597d63, + 0x5351160f, 0x3056cf2b, 0xc230c696, 0xa1371fb2, 0x043f74de, + 0x6738adfa, 0xbd4aece0, 0xde4d35c4, 0x7b455ea8, 0x1842878c, + 0xea248e31, 0x89235715, 0x2c2b3c79, 0x4f2ce55d, 0x13962942, + 0x7091f066, 0xd5999b0a, 0xb69e422e, 0x44f84b93, 0x27ff92b7, + 0x82f7f9db, 0xe1f020ff, 0x9bd34379, 0xf8d49a5d, 0x5ddcf131, + 0x3edb2815, 0xccbd21a8, 0xafbaf88c, 0x0ab293e0, 0x69b54ac4, + 0x350f86db, 0x56085fff, 0xf3003493, 0x9007edb7, 0x6261e40a, + 0x01663d2e, 0xa46e5642, 0xc7698f66, 0x1d1bce7c, 0x7e1c1758, + 0xdb147c34, 0xb813a510, 0x4a75acad, 0x29727589, 0x8c7a1ee5, + 0xef7dc7c1, 0xb3c70bde, 0xd0c0d2fa, 0x75c8b996, 0x16cf60b2, + 0xe4a9690f, 0x87aeb02b, 0x22a6db47, 0x41a10263, 0x4d335f32, + 0x2e348616, 0x8b3ced7a, 0xe83b345e, 0x1a5d3de3, 0x795ae4c7, + 0xdc528fab, 0xbf55568f, 0xe3ef9a90, 0x80e843b4, 0x25e028d8, + 0x46e7f1fc, 0xb481f841, 0xd7862165, 0x728e4a09, 0x1189932d, + 0xcbfbd237, 0xa8fc0b13, 0x0df4607f, 0x6ef3b95b, 0x9c95b0e6, + 0xff9269c2, 0x5a9a02ae, 0x399ddb8a, 0x65271795, 0x0620ceb1, + 0xa328a5dd, 0xc02f7cf9, 0x32497544, 0x514eac60, 0xf446c70c, + 0x97411e28}, + {0x00000000, 0x01b5fd1d, 0x036bfa3a, 0x02de0727, 0x06d7f474, + 0x07620969, 0x05bc0e4e, 0x0409f353, 0x0dafe8e8, 0x0c1a15f5, + 0x0ec412d2, 0x0f71efcf, 0x0b781c9c, 0x0acde181, 0x0813e6a6, + 0x09a61bbb, 0x1b5fd1d0, 0x1aea2ccd, 0x18342bea, 0x1981d6f7, + 0x1d8825a4, 0x1c3dd8b9, 0x1ee3df9e, 0x1f562283, 0x16f03938, + 0x1745c425, 0x159bc302, 0x142e3e1f, 0x1027cd4c, 0x11923051, + 0x134c3776, 0x12f9ca6b, 0x36bfa3a0, 0x370a5ebd, 0x35d4599a, + 0x3461a487, 0x306857d4, 0x31ddaac9, 0x3303adee, 0x32b650f3, + 0x3b104b48, 0x3aa5b655, 0x387bb172, 0x39ce4c6f, 0x3dc7bf3c, + 0x3c724221, 0x3eac4506, 0x3f19b81b, 0x2de07270, 0x2c558f6d, + 0x2e8b884a, 0x2f3e7557, 0x2b378604, 0x2a827b19, 0x285c7c3e, + 0x29e98123, 0x204f9a98, 0x21fa6785, 0x232460a2, 0x22919dbf, + 0x26986eec, 0x272d93f1, 0x25f394d6, 0x244669cb, 0x6d7f4740, + 0x6ccaba5d, 0x6e14bd7a, 0x6fa14067, 0x6ba8b334, 0x6a1d4e29, + 0x68c3490e, 0x6976b413, 0x60d0afa8, 0x616552b5, 0x63bb5592, + 0x620ea88f, 0x66075bdc, 0x67b2a6c1, 0x656ca1e6, 0x64d95cfb, + 0x76209690, 0x77956b8d, 0x754b6caa, 0x74fe91b7, 0x70f762e4, + 0x71429ff9, 0x739c98de, 0x722965c3, 0x7b8f7e78, 0x7a3a8365, + 0x78e48442, 0x7951795f, 0x7d588a0c, 0x7ced7711, 0x7e337036, + 0x7f868d2b, 0x5bc0e4e0, 0x5a7519fd, 0x58ab1eda, 0x591ee3c7, + 0x5d171094, 0x5ca2ed89, 0x5e7ceaae, 0x5fc917b3, 0x566f0c08, + 0x57daf115, 0x5504f632, 0x54b10b2f, 0x50b8f87c, 0x510d0561, + 0x53d30246, 0x5266ff5b, 0x409f3530, 0x412ac82d, 0x43f4cf0a, + 0x42413217, 0x4648c144, 0x47fd3c59, 0x45233b7e, 0x4496c663, + 0x4d30ddd8, 0x4c8520c5, 0x4e5b27e2, 0x4feedaff, 0x4be729ac, + 0x4a52d4b1, 0x488cd396, 0x49392e8b, 0xdafe8e80, 0xdb4b739d, + 0xd99574ba, 0xd82089a7, 0xdc297af4, 0xdd9c87e9, 0xdf4280ce, + 0xdef77dd3, 0xd7516668, 0xd6e49b75, 0xd43a9c52, 0xd58f614f, + 0xd186921c, 0xd0336f01, 0xd2ed6826, 0xd358953b, 0xc1a15f50, + 0xc014a24d, 0xc2caa56a, 0xc37f5877, 0xc776ab24, 0xc6c35639, + 0xc41d511e, 0xc5a8ac03, 0xcc0eb7b8, 0xcdbb4aa5, 0xcf654d82, + 0xced0b09f, 0xcad943cc, 0xcb6cbed1, 0xc9b2b9f6, 0xc80744eb, + 0xec412d20, 0xedf4d03d, 0xef2ad71a, 0xee9f2a07, 0xea96d954, + 0xeb232449, 0xe9fd236e, 0xe848de73, 0xe1eec5c8, 0xe05b38d5, + 0xe2853ff2, 0xe330c2ef, 0xe73931bc, 0xe68ccca1, 0xe452cb86, + 0xe5e7369b, 0xf71efcf0, 0xf6ab01ed, 0xf47506ca, 0xf5c0fbd7, + 0xf1c90884, 0xf07cf599, 0xf2a2f2be, 0xf3170fa3, 0xfab11418, + 0xfb04e905, 0xf9daee22, 0xf86f133f, 0xfc66e06c, 0xfdd31d71, + 0xff0d1a56, 0xfeb8e74b, 0xb781c9c0, 0xb63434dd, 0xb4ea33fa, + 0xb55fcee7, 0xb1563db4, 0xb0e3c0a9, 0xb23dc78e, 0xb3883a93, + 0xba2e2128, 0xbb9bdc35, 0xb945db12, 0xb8f0260f, 0xbcf9d55c, + 0xbd4c2841, 0xbf922f66, 0xbe27d27b, 0xacde1810, 0xad6be50d, + 0xafb5e22a, 0xae001f37, 0xaa09ec64, 0xabbc1179, 0xa962165e, + 0xa8d7eb43, 0xa171f0f8, 0xa0c40de5, 0xa21a0ac2, 0xa3aff7df, + 0xa7a6048c, 0xa613f991, 0xa4cdfeb6, 0xa57803ab, 0x813e6a60, + 0x808b977d, 0x8255905a, 0x83e06d47, 0x87e99e14, 0x865c6309, + 0x8482642e, 0x85379933, 0x8c918288, 0x8d247f95, 0x8ffa78b2, + 0x8e4f85af, 0x8a4676fc, 0x8bf38be1, 0x892d8cc6, 0x889871db, + 0x9a61bbb0, 0x9bd446ad, 0x990a418a, 0x98bfbc97, 0x9cb64fc4, + 0x9d03b2d9, 0x9fddb5fe, 0x9e6848e3, 0x97ce5358, 0x967bae45, + 0x94a5a962, 0x9510547f, 0x9119a72c, 0x90ac5a31, 0x92725d16, + 0x93c7a00b}, + {0x00000000, 0x6e8c1b41, 0xdd183682, 0xb3942dc3, 0x61416b45, + 0x0fcd7004, 0xbc595dc7, 0xd2d54686, 0xc282d68a, 0xac0ecdcb, + 0x1f9ae008, 0x7116fb49, 0xa3c3bdcf, 0xcd4fa68e, 0x7edb8b4d, + 0x1057900c, 0x5e74ab55, 0x30f8b014, 0x836c9dd7, 0xede08696, + 0x3f35c010, 0x51b9db51, 0xe22df692, 0x8ca1edd3, 0x9cf67ddf, + 0xf27a669e, 0x41ee4b5d, 0x2f62501c, 0xfdb7169a, 0x933b0ddb, + 0x20af2018, 0x4e233b59, 0xbce956aa, 0xd2654deb, 0x61f16028, + 0x0f7d7b69, 0xdda83def, 0xb32426ae, 0x00b00b6d, 0x6e3c102c, + 0x7e6b8020, 0x10e79b61, 0xa373b6a2, 0xcdffade3, 0x1f2aeb65, + 0x71a6f024, 0xc232dde7, 0xacbec6a6, 0xe29dfdff, 0x8c11e6be, + 0x3f85cb7d, 0x5109d03c, 0x83dc96ba, 0xed508dfb, 0x5ec4a038, + 0x3048bb79, 0x201f2b75, 0x4e933034, 0xfd071df7, 0x938b06b6, + 0x415e4030, 0x2fd25b71, 0x9c4676b2, 0xf2ca6df3, 0xa2a3ab15, + 0xcc2fb054, 0x7fbb9d97, 0x113786d6, 0xc3e2c050, 0xad6edb11, + 0x1efaf6d2, 0x7076ed93, 0x60217d9f, 0x0ead66de, 0xbd394b1d, + 0xd3b5505c, 0x016016da, 0x6fec0d9b, 0xdc782058, 0xb2f43b19, + 0xfcd70040, 0x925b1b01, 0x21cf36c2, 0x4f432d83, 0x9d966b05, + 0xf31a7044, 0x408e5d87, 0x2e0246c6, 0x3e55d6ca, 0x50d9cd8b, + 0xe34de048, 0x8dc1fb09, 0x5f14bd8f, 0x3198a6ce, 0x820c8b0d, + 0xec80904c, 0x1e4afdbf, 0x70c6e6fe, 0xc352cb3d, 0xadded07c, + 0x7f0b96fa, 0x11878dbb, 0xa213a078, 0xcc9fbb39, 0xdcc82b35, + 0xb2443074, 0x01d01db7, 0x6f5c06f6, 0xbd894070, 0xd3055b31, + 0x609176f2, 0x0e1d6db3, 0x403e56ea, 0x2eb24dab, 0x9d266068, + 0xf3aa7b29, 0x217f3daf, 0x4ff326ee, 0xfc670b2d, 0x92eb106c, + 0x82bc8060, 0xec309b21, 0x5fa4b6e2, 0x3128ada3, 0xe3fdeb25, + 0x8d71f064, 0x3ee5dda7, 0x5069c6e6, 0x9e36506b, 0xf0ba4b2a, + 0x432e66e9, 0x2da27da8, 0xff773b2e, 0x91fb206f, 0x226f0dac, + 0x4ce316ed, 0x5cb486e1, 0x32389da0, 0x81acb063, 0xef20ab22, + 0x3df5eda4, 0x5379f6e5, 0xe0eddb26, 0x8e61c067, 0xc042fb3e, + 0xaecee07f, 0x1d5acdbc, 0x73d6d6fd, 0xa103907b, 0xcf8f8b3a, + 0x7c1ba6f9, 0x1297bdb8, 0x02c02db4, 0x6c4c36f5, 0xdfd81b36, + 0xb1540077, 0x638146f1, 0x0d0d5db0, 0xbe997073, 0xd0156b32, + 0x22df06c1, 0x4c531d80, 0xffc73043, 0x914b2b02, 0x439e6d84, + 0x2d1276c5, 0x9e865b06, 0xf00a4047, 0xe05dd04b, 0x8ed1cb0a, + 0x3d45e6c9, 0x53c9fd88, 0x811cbb0e, 0xef90a04f, 0x5c048d8c, + 0x328896cd, 0x7cabad94, 0x1227b6d5, 0xa1b39b16, 0xcf3f8057, + 0x1deac6d1, 0x7366dd90, 0xc0f2f053, 0xae7eeb12, 0xbe297b1e, + 0xd0a5605f, 0x63314d9c, 0x0dbd56dd, 0xdf68105b, 0xb1e40b1a, + 0x027026d9, 0x6cfc3d98, 0x3c95fb7e, 0x5219e03f, 0xe18dcdfc, + 0x8f01d6bd, 0x5dd4903b, 0x33588b7a, 0x80cca6b9, 0xee40bdf8, + 0xfe172df4, 0x909b36b5, 0x230f1b76, 0x4d830037, 0x9f5646b1, + 0xf1da5df0, 0x424e7033, 0x2cc26b72, 0x62e1502b, 0x0c6d4b6a, + 0xbff966a9, 0xd1757de8, 0x03a03b6e, 0x6d2c202f, 0xdeb80dec, + 0xb03416ad, 0xa06386a1, 0xceef9de0, 0x7d7bb023, 0x13f7ab62, + 0xc122ede4, 0xafaef6a5, 0x1c3adb66, 0x72b6c027, 0x807cadd4, + 0xeef0b695, 0x5d649b56, 0x33e88017, 0xe13dc691, 0x8fb1ddd0, + 0x3c25f013, 0x52a9eb52, 0x42fe7b5e, 0x2c72601f, 0x9fe64ddc, + 0xf16a569d, 0x23bf101b, 0x4d330b5a, 0xfea72699, 0x902b3dd8, + 0xde080681, 0xb0841dc0, 0x03103003, 0x6d9c2b42, 0xbf496dc4, + 0xd1c57685, 0x62515b46, 0x0cdd4007, 0x1c8ad00b, 0x7206cb4a, + 0xc192e689, 0xaf1efdc8, 0x7dcbbb4e, 0x1347a00f, 0xa0d38dcc, + 0xce5f968d}, + {0x00000000, 0xe71da697, 0x154a4b6f, 0xf257edf8, 0x2a9496de, + 0xcd893049, 0x3fdeddb1, 0xd8c37b26, 0x55292dbc, 0xb2348b2b, + 0x406366d3, 0xa77ec044, 0x7fbdbb62, 0x98a01df5, 0x6af7f00d, + 0x8dea569a, 0xaa525b78, 0x4d4ffdef, 0xbf181017, 0x5805b680, + 0x80c6cda6, 0x67db6b31, 0x958c86c9, 0x7291205e, 0xff7b76c4, + 0x1866d053, 0xea313dab, 0x0d2c9b3c, 0xd5efe01a, 0x32f2468d, + 0xc0a5ab75, 0x27b80de2, 0x8fd5b0b1, 0x68c81626, 0x9a9ffbde, + 0x7d825d49, 0xa541266f, 0x425c80f8, 0xb00b6d00, 0x5716cb97, + 0xdafc9d0d, 0x3de13b9a, 0xcfb6d662, 0x28ab70f5, 0xf0680bd3, + 0x1775ad44, 0xe52240bc, 0x023fe62b, 0x2587ebc9, 0xc29a4d5e, + 0x30cda0a6, 0xd7d00631, 0x0f137d17, 0xe80edb80, 0x1a593678, + 0xfd4490ef, 0x70aec675, 0x97b360e2, 0x65e48d1a, 0x82f92b8d, + 0x5a3a50ab, 0xbd27f63c, 0x4f701bc4, 0xa86dbd53, 0xc4da6723, + 0x23c7c1b4, 0xd1902c4c, 0x368d8adb, 0xee4ef1fd, 0x0953576a, + 0xfb04ba92, 0x1c191c05, 0x91f34a9f, 0x76eeec08, 0x84b901f0, + 0x63a4a767, 0xbb67dc41, 0x5c7a7ad6, 0xae2d972e, 0x493031b9, + 0x6e883c5b, 0x89959acc, 0x7bc27734, 0x9cdfd1a3, 0x441caa85, + 0xa3010c12, 0x5156e1ea, 0xb64b477d, 0x3ba111e7, 0xdcbcb770, + 0x2eeb5a88, 0xc9f6fc1f, 0x11358739, 0xf62821ae, 0x047fcc56, + 0xe3626ac1, 0x4b0fd792, 0xac127105, 0x5e459cfd, 0xb9583a6a, + 0x619b414c, 0x8686e7db, 0x74d10a23, 0x93ccacb4, 0x1e26fa2e, + 0xf93b5cb9, 0x0b6cb141, 0xec7117d6, 0x34b26cf0, 0xd3afca67, + 0x21f8279f, 0xc6e58108, 0xe15d8cea, 0x06402a7d, 0xf417c785, + 0x130a6112, 0xcbc91a34, 0x2cd4bca3, 0xde83515b, 0x399ef7cc, + 0xb474a156, 0x536907c1, 0xa13eea39, 0x46234cae, 0x9ee03788, + 0x79fd911f, 0x8baa7ce7, 0x6cb7da70, 0x52c5c807, 0xb5d86e90, + 0x478f8368, 0xa09225ff, 0x78515ed9, 0x9f4cf84e, 0x6d1b15b6, + 0x8a06b321, 0x07ece5bb, 0xe0f1432c, 0x12a6aed4, 0xf5bb0843, + 0x2d787365, 0xca65d5f2, 0x3832380a, 0xdf2f9e9d, 0xf897937f, + 0x1f8a35e8, 0xedddd810, 0x0ac07e87, 0xd20305a1, 0x351ea336, + 0xc7494ece, 0x2054e859, 0xadbebec3, 0x4aa31854, 0xb8f4f5ac, + 0x5fe9533b, 0x872a281d, 0x60378e8a, 0x92606372, 0x757dc5e5, + 0xdd1078b6, 0x3a0dde21, 0xc85a33d9, 0x2f47954e, 0xf784ee68, + 0x109948ff, 0xe2cea507, 0x05d30390, 0x8839550a, 0x6f24f39d, + 0x9d731e65, 0x7a6eb8f2, 0xa2adc3d4, 0x45b06543, 0xb7e788bb, + 0x50fa2e2c, 0x774223ce, 0x905f8559, 0x620868a1, 0x8515ce36, + 0x5dd6b510, 0xbacb1387, 0x489cfe7f, 0xaf8158e8, 0x226b0e72, + 0xc576a8e5, 0x3721451d, 0xd03ce38a, 0x08ff98ac, 0xefe23e3b, + 0x1db5d3c3, 0xfaa87554, 0x961faf24, 0x710209b3, 0x8355e44b, + 0x644842dc, 0xbc8b39fa, 0x5b969f6d, 0xa9c17295, 0x4edcd402, + 0xc3368298, 0x242b240f, 0xd67cc9f7, 0x31616f60, 0xe9a21446, + 0x0ebfb2d1, 0xfce85f29, 0x1bf5f9be, 0x3c4df45c, 0xdb5052cb, + 0x2907bf33, 0xce1a19a4, 0x16d96282, 0xf1c4c415, 0x039329ed, + 0xe48e8f7a, 0x6964d9e0, 0x8e797f77, 0x7c2e928f, 0x9b333418, + 0x43f04f3e, 0xa4ede9a9, 0x56ba0451, 0xb1a7a2c6, 0x19ca1f95, + 0xfed7b902, 0x0c8054fa, 0xeb9df26d, 0x335e894b, 0xd4432fdc, + 0x2614c224, 0xc10964b3, 0x4ce33229, 0xabfe94be, 0x59a97946, + 0xbeb4dfd1, 0x6677a4f7, 0x816a0260, 0x733def98, 0x9420490f, + 0xb39844ed, 0x5485e27a, 0xa6d20f82, 0x41cfa915, 0x990cd233, + 0x7e1174a4, 0x8c46995c, 0x6b5b3fcb, 0xe6b16951, 0x01accfc6, + 0xf3fb223e, 0x14e684a9, 0xcc25ff8f, 0x2b385918, 0xd96fb4e0, + 0x3e721277}, + {0x00000000, 0xa58b900e, 0x9066265d, 0x35edb653, 0xfbbd4afb, + 0x5e36daf5, 0x6bdb6ca6, 0xce50fca8, 0x2c0b93b7, 0x898003b9, + 0xbc6db5ea, 0x19e625e4, 0xd7b6d94c, 0x723d4942, 0x47d0ff11, + 0xe25b6f1f, 0x5817276e, 0xfd9cb760, 0xc8710133, 0x6dfa913d, + 0xa3aa6d95, 0x0621fd9b, 0x33cc4bc8, 0x9647dbc6, 0x741cb4d9, + 0xd19724d7, 0xe47a9284, 0x41f1028a, 0x8fa1fe22, 0x2a2a6e2c, + 0x1fc7d87f, 0xba4c4871, 0xb02e4edc, 0x15a5ded2, 0x20486881, + 0x85c3f88f, 0x4b930427, 0xee189429, 0xdbf5227a, 0x7e7eb274, + 0x9c25dd6b, 0x39ae4d65, 0x0c43fb36, 0xa9c86b38, 0x67989790, + 0xc213079e, 0xf7feb1cd, 0x527521c3, 0xe83969b2, 0x4db2f9bc, + 0x785f4fef, 0xddd4dfe1, 0x13842349, 0xb60fb347, 0x83e20514, + 0x2669951a, 0xc432fa05, 0x61b96a0b, 0x5454dc58, 0xf1df4c56, + 0x3f8fb0fe, 0x9a0420f0, 0xafe996a3, 0x0a6206ad, 0xbb2d9bf9, + 0x1ea60bf7, 0x2b4bbda4, 0x8ec02daa, 0x4090d102, 0xe51b410c, + 0xd0f6f75f, 0x757d6751, 0x9726084e, 0x32ad9840, 0x07402e13, + 0xa2cbbe1d, 0x6c9b42b5, 0xc910d2bb, 0xfcfd64e8, 0x5976f4e6, + 0xe33abc97, 0x46b12c99, 0x735c9aca, 0xd6d70ac4, 0x1887f66c, + 0xbd0c6662, 0x88e1d031, 0x2d6a403f, 0xcf312f20, 0x6ababf2e, + 0x5f57097d, 0xfadc9973, 0x348c65db, 0x9107f5d5, 0xa4ea4386, + 0x0161d388, 0x0b03d525, 0xae88452b, 0x9b65f378, 0x3eee6376, + 0xf0be9fde, 0x55350fd0, 0x60d8b983, 0xc553298d, 0x27084692, + 0x8283d69c, 0xb76e60cf, 0x12e5f0c1, 0xdcb50c69, 0x793e9c67, + 0x4cd32a34, 0xe958ba3a, 0x5314f24b, 0xf69f6245, 0xc372d416, + 0x66f94418, 0xa8a9b8b0, 0x0d2228be, 0x38cf9eed, 0x9d440ee3, + 0x7f1f61fc, 0xda94f1f2, 0xef7947a1, 0x4af2d7af, 0x84a22b07, + 0x2129bb09, 0x14c40d5a, 0xb14f9d54, 0xad2a31b3, 0x08a1a1bd, + 0x3d4c17ee, 0x98c787e0, 0x56977b48, 0xf31ceb46, 0xc6f15d15, + 0x637acd1b, 0x8121a204, 0x24aa320a, 0x11478459, 0xb4cc1457, + 0x7a9ce8ff, 0xdf1778f1, 0xeafacea2, 0x4f715eac, 0xf53d16dd, + 0x50b686d3, 0x655b3080, 0xc0d0a08e, 0x0e805c26, 0xab0bcc28, + 0x9ee67a7b, 0x3b6dea75, 0xd936856a, 0x7cbd1564, 0x4950a337, + 0xecdb3339, 0x228bcf91, 0x87005f9f, 0xb2ede9cc, 0x176679c2, + 0x1d047f6f, 0xb88fef61, 0x8d625932, 0x28e9c93c, 0xe6b93594, + 0x4332a59a, 0x76df13c9, 0xd35483c7, 0x310fecd8, 0x94847cd6, + 0xa169ca85, 0x04e25a8b, 0xcab2a623, 0x6f39362d, 0x5ad4807e, + 0xff5f1070, 0x45135801, 0xe098c80f, 0xd5757e5c, 0x70feee52, + 0xbeae12fa, 0x1b2582f4, 0x2ec834a7, 0x8b43a4a9, 0x6918cbb6, + 0xcc935bb8, 0xf97eedeb, 0x5cf57de5, 0x92a5814d, 0x372e1143, + 0x02c3a710, 0xa748371e, 0x1607aa4a, 0xb38c3a44, 0x86618c17, + 0x23ea1c19, 0xedbae0b1, 0x483170bf, 0x7ddcc6ec, 0xd85756e2, + 0x3a0c39fd, 0x9f87a9f3, 0xaa6a1fa0, 0x0fe18fae, 0xc1b17306, + 0x643ae308, 0x51d7555b, 0xf45cc555, 0x4e108d24, 0xeb9b1d2a, + 0xde76ab79, 0x7bfd3b77, 0xb5adc7df, 0x102657d1, 0x25cbe182, + 0x8040718c, 0x621b1e93, 0xc7908e9d, 0xf27d38ce, 0x57f6a8c0, + 0x99a65468, 0x3c2dc466, 0x09c07235, 0xac4be23b, 0xa629e496, + 0x03a27498, 0x364fc2cb, 0x93c452c5, 0x5d94ae6d, 0xf81f3e63, + 0xcdf28830, 0x6879183e, 0x8a227721, 0x2fa9e72f, 0x1a44517c, + 0xbfcfc172, 0x719f3dda, 0xd414add4, 0xe1f91b87, 0x44728b89, + 0xfe3ec3f8, 0x5bb553f6, 0x6e58e5a5, 0xcbd375ab, 0x05838903, + 0xa008190d, 0x95e5af5e, 0x306e3f50, 0xd235504f, 0x77bec041, + 0x42537612, 0xe7d8e61c, 0x29881ab4, 0x8c038aba, 0xb9ee3ce9, + 0x1c65ace7}}; + +local const z_word_t FAR crc_braid_big_table[][256] = { + {0x0000000000000000, 0x0e908ba500000000, 0x5d26669000000000, + 0x53b6ed3500000000, 0xfb4abdfb00000000, 0xf5da365e00000000, + 0xa66cdb6b00000000, 0xa8fc50ce00000000, 0xb7930b2c00000000, + 0xb903808900000000, 0xeab56dbc00000000, 0xe425e61900000000, + 0x4cd9b6d700000000, 0x42493d7200000000, 0x11ffd04700000000, + 0x1f6f5be200000000, 0x6e27175800000000, 0x60b79cfd00000000, + 0x330171c800000000, 0x3d91fa6d00000000, 0x956daaa300000000, + 0x9bfd210600000000, 0xc84bcc3300000000, 0xc6db479600000000, + 0xd9b41c7400000000, 0xd72497d100000000, 0x84927ae400000000, + 0x8a02f14100000000, 0x22fea18f00000000, 0x2c6e2a2a00000000, + 0x7fd8c71f00000000, 0x71484cba00000000, 0xdc4e2eb000000000, + 0xd2dea51500000000, 0x8168482000000000, 0x8ff8c38500000000, + 0x2704934b00000000, 0x299418ee00000000, 0x7a22f5db00000000, + 0x74b27e7e00000000, 0x6bdd259c00000000, 0x654dae3900000000, + 0x36fb430c00000000, 0x386bc8a900000000, 0x9097986700000000, + 0x9e0713c200000000, 0xcdb1fef700000000, 0xc321755200000000, + 0xb26939e800000000, 0xbcf9b24d00000000, 0xef4f5f7800000000, + 0xe1dfd4dd00000000, 0x4923841300000000, 0x47b30fb600000000, + 0x1405e28300000000, 0x1a95692600000000, 0x05fa32c400000000, + 0x0b6ab96100000000, 0x58dc545400000000, 0x564cdff100000000, + 0xfeb08f3f00000000, 0xf020049a00000000, 0xa396e9af00000000, + 0xad06620a00000000, 0xf99b2dbb00000000, 0xf70ba61e00000000, + 0xa4bd4b2b00000000, 0xaa2dc08e00000000, 0x02d1904000000000, + 0x0c411be500000000, 0x5ff7f6d000000000, 0x51677d7500000000, + 0x4e08269700000000, 0x4098ad3200000000, 0x132e400700000000, + 0x1dbecba200000000, 0xb5429b6c00000000, 0xbbd210c900000000, + 0xe864fdfc00000000, 0xe6f4765900000000, 0x97bc3ae300000000, + 0x992cb14600000000, 0xca9a5c7300000000, 0xc40ad7d600000000, + 0x6cf6871800000000, 0x62660cbd00000000, 0x31d0e18800000000, + 0x3f406a2d00000000, 0x202f31cf00000000, 0x2ebfba6a00000000, + 0x7d09575f00000000, 0x7399dcfa00000000, 0xdb658c3400000000, + 0xd5f5079100000000, 0x8643eaa400000000, 0x88d3610100000000, + 0x25d5030b00000000, 0x2b4588ae00000000, 0x78f3659b00000000, + 0x7663ee3e00000000, 0xde9fbef000000000, 0xd00f355500000000, + 0x83b9d86000000000, 0x8d2953c500000000, 0x9246082700000000, + 0x9cd6838200000000, 0xcf606eb700000000, 0xc1f0e51200000000, + 0x690cb5dc00000000, 0x679c3e7900000000, 0x342ad34c00000000, + 0x3aba58e900000000, 0x4bf2145300000000, 0x45629ff600000000, + 0x16d472c300000000, 0x1844f96600000000, 0xb0b8a9a800000000, + 0xbe28220d00000000, 0xed9ecf3800000000, 0xe30e449d00000000, + 0xfc611f7f00000000, 0xf2f194da00000000, 0xa14779ef00000000, + 0xafd7f24a00000000, 0x072ba28400000000, 0x09bb292100000000, + 0x5a0dc41400000000, 0x549d4fb100000000, 0xb3312aad00000000, + 0xbda1a10800000000, 0xee174c3d00000000, 0xe087c79800000000, + 0x487b975600000000, 0x46eb1cf300000000, 0x155df1c600000000, + 0x1bcd7a6300000000, 0x04a2218100000000, 0x0a32aa2400000000, + 0x5984471100000000, 0x5714ccb400000000, 0xffe89c7a00000000, + 0xf17817df00000000, 0xa2cefaea00000000, 0xac5e714f00000000, + 0xdd163df500000000, 0xd386b65000000000, 0x80305b6500000000, + 0x8ea0d0c000000000, 0x265c800e00000000, 0x28cc0bab00000000, + 0x7b7ae69e00000000, 0x75ea6d3b00000000, 0x6a8536d900000000, + 0x6415bd7c00000000, 0x37a3504900000000, 0x3933dbec00000000, + 0x91cf8b2200000000, 0x9f5f008700000000, 0xcce9edb200000000, + 0xc279661700000000, 0x6f7f041d00000000, 0x61ef8fb800000000, + 0x3259628d00000000, 0x3cc9e92800000000, 0x9435b9e600000000, + 0x9aa5324300000000, 0xc913df7600000000, 0xc78354d300000000, + 0xd8ec0f3100000000, 0xd67c849400000000, 0x85ca69a100000000, + 0x8b5ae20400000000, 0x23a6b2ca00000000, 0x2d36396f00000000, + 0x7e80d45a00000000, 0x70105fff00000000, 0x0158134500000000, + 0x0fc898e000000000, 0x5c7e75d500000000, 0x52eefe7000000000, + 0xfa12aebe00000000, 0xf482251b00000000, 0xa734c82e00000000, + 0xa9a4438b00000000, 0xb6cb186900000000, 0xb85b93cc00000000, + 0xebed7ef900000000, 0xe57df55c00000000, 0x4d81a59200000000, + 0x43112e3700000000, 0x10a7c30200000000, 0x1e3748a700000000, + 0x4aaa071600000000, 0x443a8cb300000000, 0x178c618600000000, + 0x191cea2300000000, 0xb1e0baed00000000, 0xbf70314800000000, + 0xecc6dc7d00000000, 0xe25657d800000000, 0xfd390c3a00000000, + 0xf3a9879f00000000, 0xa01f6aaa00000000, 0xae8fe10f00000000, + 0x0673b1c100000000, 0x08e33a6400000000, 0x5b55d75100000000, + 0x55c55cf400000000, 0x248d104e00000000, 0x2a1d9beb00000000, + 0x79ab76de00000000, 0x773bfd7b00000000, 0xdfc7adb500000000, + 0xd157261000000000, 0x82e1cb2500000000, 0x8c71408000000000, + 0x931e1b6200000000, 0x9d8e90c700000000, 0xce387df200000000, + 0xc0a8f65700000000, 0x6854a69900000000, 0x66c42d3c00000000, + 0x3572c00900000000, 0x3be24bac00000000, 0x96e429a600000000, + 0x9874a20300000000, 0xcbc24f3600000000, 0xc552c49300000000, + 0x6dae945d00000000, 0x633e1ff800000000, 0x3088f2cd00000000, + 0x3e18796800000000, 0x2177228a00000000, 0x2fe7a92f00000000, + 0x7c51441a00000000, 0x72c1cfbf00000000, 0xda3d9f7100000000, + 0xd4ad14d400000000, 0x871bf9e100000000, 0x898b724400000000, + 0xf8c33efe00000000, 0xf653b55b00000000, 0xa5e5586e00000000, + 0xab75d3cb00000000, 0x0389830500000000, 0x0d1908a000000000, + 0x5eafe59500000000, 0x503f6e3000000000, 0x4f5035d200000000, + 0x41c0be7700000000, 0x1276534200000000, 0x1ce6d8e700000000, + 0xb41a882900000000, 0xba8a038c00000000, 0xe93ceeb900000000, + 0xe7ac651c00000000}, + {0x0000000000000000, 0x97a61de700000000, 0x6f4b4a1500000000, + 0xf8ed57f200000000, 0xde96942a00000000, 0x493089cd00000000, + 0xb1ddde3f00000000, 0x267bc3d800000000, 0xbc2d295500000000, + 0x2b8b34b200000000, 0xd366634000000000, 0x44c07ea700000000, + 0x62bbbd7f00000000, 0xf51da09800000000, 0x0df0f76a00000000, + 0x9a56ea8d00000000, 0x785b52aa00000000, 0xeffd4f4d00000000, + 0x171018bf00000000, 0x80b6055800000000, 0xa6cdc68000000000, + 0x316bdb6700000000, 0xc9868c9500000000, 0x5e20917200000000, + 0xc4767bff00000000, 0x53d0661800000000, 0xab3d31ea00000000, + 0x3c9b2c0d00000000, 0x1ae0efd500000000, 0x8d46f23200000000, + 0x75aba5c000000000, 0xe20db82700000000, 0xb1b0d58f00000000, + 0x2616c86800000000, 0xdefb9f9a00000000, 0x495d827d00000000, + 0x6f2641a500000000, 0xf8805c4200000000, 0x006d0bb000000000, + 0x97cb165700000000, 0x0d9dfcda00000000, 0x9a3be13d00000000, + 0x62d6b6cf00000000, 0xf570ab2800000000, 0xd30b68f000000000, + 0x44ad751700000000, 0xbc4022e500000000, 0x2be63f0200000000, + 0xc9eb872500000000, 0x5e4d9ac200000000, 0xa6a0cd3000000000, + 0x3106d0d700000000, 0x177d130f00000000, 0x80db0ee800000000, + 0x7836591a00000000, 0xef9044fd00000000, 0x75c6ae7000000000, + 0xe260b39700000000, 0x1a8de46500000000, 0x8d2bf98200000000, + 0xab503a5a00000000, 0x3cf627bd00000000, 0xc41b704f00000000, + 0x53bd6da800000000, 0x2367dac400000000, 0xb4c1c72300000000, + 0x4c2c90d100000000, 0xdb8a8d3600000000, 0xfdf14eee00000000, + 0x6a57530900000000, 0x92ba04fb00000000, 0x051c191c00000000, + 0x9f4af39100000000, 0x08ecee7600000000, 0xf001b98400000000, + 0x67a7a46300000000, 0x41dc67bb00000000, 0xd67a7a5c00000000, + 0x2e972dae00000000, 0xb931304900000000, 0x5b3c886e00000000, + 0xcc9a958900000000, 0x3477c27b00000000, 0xa3d1df9c00000000, + 0x85aa1c4400000000, 0x120c01a300000000, 0xeae1565100000000, + 0x7d474bb600000000, 0xe711a13b00000000, 0x70b7bcdc00000000, + 0x885aeb2e00000000, 0x1ffcf6c900000000, 0x3987351100000000, + 0xae2128f600000000, 0x56cc7f0400000000, 0xc16a62e300000000, + 0x92d70f4b00000000, 0x057112ac00000000, 0xfd9c455e00000000, + 0x6a3a58b900000000, 0x4c419b6100000000, 0xdbe7868600000000, + 0x230ad17400000000, 0xb4accc9300000000, 0x2efa261e00000000, + 0xb95c3bf900000000, 0x41b16c0b00000000, 0xd61771ec00000000, + 0xf06cb23400000000, 0x67caafd300000000, 0x9f27f82100000000, + 0x0881e5c600000000, 0xea8c5de100000000, 0x7d2a400600000000, + 0x85c717f400000000, 0x12610a1300000000, 0x341ac9cb00000000, + 0xa3bcd42c00000000, 0x5b5183de00000000, 0xccf79e3900000000, + 0x56a174b400000000, 0xc107695300000000, 0x39ea3ea100000000, + 0xae4c234600000000, 0x8837e09e00000000, 0x1f91fd7900000000, + 0xe77caa8b00000000, 0x70dab76c00000000, 0x07c8c55200000000, + 0x906ed8b500000000, 0x68838f4700000000, 0xff2592a000000000, + 0xd95e517800000000, 0x4ef84c9f00000000, 0xb6151b6d00000000, + 0x21b3068a00000000, 0xbbe5ec0700000000, 0x2c43f1e000000000, + 0xd4aea61200000000, 0x4308bbf500000000, 0x6573782d00000000, + 0xf2d565ca00000000, 0x0a38323800000000, 0x9d9e2fdf00000000, + 0x7f9397f800000000, 0xe8358a1f00000000, 0x10d8dded00000000, + 0x877ec00a00000000, 0xa10503d200000000, 0x36a31e3500000000, + 0xce4e49c700000000, 0x59e8542000000000, 0xc3bebead00000000, + 0x5418a34a00000000, 0xacf5f4b800000000, 0x3b53e95f00000000, + 0x1d282a8700000000, 0x8a8e376000000000, 0x7263609200000000, + 0xe5c57d7500000000, 0xb67810dd00000000, 0x21de0d3a00000000, + 0xd9335ac800000000, 0x4e95472f00000000, 0x68ee84f700000000, + 0xff48991000000000, 0x07a5cee200000000, 0x9003d30500000000, + 0x0a55398800000000, 0x9df3246f00000000, 0x651e739d00000000, + 0xf2b86e7a00000000, 0xd4c3ada200000000, 0x4365b04500000000, + 0xbb88e7b700000000, 0x2c2efa5000000000, 0xce23427700000000, + 0x59855f9000000000, 0xa168086200000000, 0x36ce158500000000, + 0x10b5d65d00000000, 0x8713cbba00000000, 0x7ffe9c4800000000, + 0xe85881af00000000, 0x720e6b2200000000, 0xe5a876c500000000, + 0x1d45213700000000, 0x8ae33cd000000000, 0xac98ff0800000000, + 0x3b3ee2ef00000000, 0xc3d3b51d00000000, 0x5475a8fa00000000, + 0x24af1f9600000000, 0xb309027100000000, 0x4be4558300000000, + 0xdc42486400000000, 0xfa398bbc00000000, 0x6d9f965b00000000, + 0x9572c1a900000000, 0x02d4dc4e00000000, 0x988236c300000000, + 0x0f242b2400000000, 0xf7c97cd600000000, 0x606f613100000000, + 0x4614a2e900000000, 0xd1b2bf0e00000000, 0x295fe8fc00000000, + 0xbef9f51b00000000, 0x5cf44d3c00000000, 0xcb5250db00000000, + 0x33bf072900000000, 0xa4191ace00000000, 0x8262d91600000000, + 0x15c4c4f100000000, 0xed29930300000000, 0x7a8f8ee400000000, + 0xe0d9646900000000, 0x777f798e00000000, 0x8f922e7c00000000, + 0x1834339b00000000, 0x3e4ff04300000000, 0xa9e9eda400000000, + 0x5104ba5600000000, 0xc6a2a7b100000000, 0x951fca1900000000, + 0x02b9d7fe00000000, 0xfa54800c00000000, 0x6df29deb00000000, + 0x4b895e3300000000, 0xdc2f43d400000000, 0x24c2142600000000, + 0xb36409c100000000, 0x2932e34c00000000, 0xbe94feab00000000, + 0x4679a95900000000, 0xd1dfb4be00000000, 0xf7a4776600000000, + 0x60026a8100000000, 0x98ef3d7300000000, 0x0f49209400000000, + 0xed4498b300000000, 0x7ae2855400000000, 0x820fd2a600000000, + 0x15a9cf4100000000, 0x33d20c9900000000, 0xa474117e00000000, + 0x5c99468c00000000, 0xcb3f5b6b00000000, 0x5169b1e600000000, + 0xc6cfac0100000000, 0x3e22fbf300000000, 0xa984e61400000000, + 0x8fff25cc00000000, 0x1859382b00000000, 0xe0b46fd900000000, + 0x7712723e00000000}, + {0x0000000000000000, 0x411b8c6e00000000, 0x823618dd00000000, + 0xc32d94b300000000, 0x456b416100000000, 0x0470cd0f00000000, + 0xc75d59bc00000000, 0x8646d5d200000000, 0x8ad682c200000000, + 0xcbcd0eac00000000, 0x08e09a1f00000000, 0x49fb167100000000, + 0xcfbdc3a300000000, 0x8ea64fcd00000000, 0x4d8bdb7e00000000, + 0x0c90571000000000, 0x55ab745e00000000, 0x14b0f83000000000, + 0xd79d6c8300000000, 0x9686e0ed00000000, 0x10c0353f00000000, + 0x51dbb95100000000, 0x92f62de200000000, 0xd3eda18c00000000, + 0xdf7df69c00000000, 0x9e667af200000000, 0x5d4bee4100000000, + 0x1c50622f00000000, 0x9a16b7fd00000000, 0xdb0d3b9300000000, + 0x1820af2000000000, 0x593b234e00000000, 0xaa56e9bc00000000, + 0xeb4d65d200000000, 0x2860f16100000000, 0x697b7d0f00000000, + 0xef3da8dd00000000, 0xae2624b300000000, 0x6d0bb00000000000, + 0x2c103c6e00000000, 0x20806b7e00000000, 0x619be71000000000, + 0xa2b673a300000000, 0xe3adffcd00000000, 0x65eb2a1f00000000, + 0x24f0a67100000000, 0xe7dd32c200000000, 0xa6c6beac00000000, + 0xfffd9de200000000, 0xbee6118c00000000, 0x7dcb853f00000000, + 0x3cd0095100000000, 0xba96dc8300000000, 0xfb8d50ed00000000, + 0x38a0c45e00000000, 0x79bb483000000000, 0x752b1f2000000000, + 0x3430934e00000000, 0xf71d07fd00000000, 0xb6068b9300000000, + 0x30405e4100000000, 0x715bd22f00000000, 0xb276469c00000000, + 0xf36dcaf200000000, 0x15aba3a200000000, 0x54b02fcc00000000, + 0x979dbb7f00000000, 0xd686371100000000, 0x50c0e2c300000000, + 0x11db6ead00000000, 0xd2f6fa1e00000000, 0x93ed767000000000, + 0x9f7d216000000000, 0xde66ad0e00000000, 0x1d4b39bd00000000, + 0x5c50b5d300000000, 0xda16600100000000, 0x9b0dec6f00000000, + 0x582078dc00000000, 0x193bf4b200000000, 0x4000d7fc00000000, + 0x011b5b9200000000, 0xc236cf2100000000, 0x832d434f00000000, + 0x056b969d00000000, 0x44701af300000000, 0x875d8e4000000000, + 0xc646022e00000000, 0xcad6553e00000000, 0x8bcdd95000000000, + 0x48e04de300000000, 0x09fbc18d00000000, 0x8fbd145f00000000, + 0xcea6983100000000, 0x0d8b0c8200000000, 0x4c9080ec00000000, + 0xbffd4a1e00000000, 0xfee6c67000000000, 0x3dcb52c300000000, + 0x7cd0dead00000000, 0xfa960b7f00000000, 0xbb8d871100000000, + 0x78a013a200000000, 0x39bb9fcc00000000, 0x352bc8dc00000000, + 0x743044b200000000, 0xb71dd00100000000, 0xf6065c6f00000000, + 0x704089bd00000000, 0x315b05d300000000, 0xf276916000000000, + 0xb36d1d0e00000000, 0xea563e4000000000, 0xab4db22e00000000, + 0x6860269d00000000, 0x297baaf300000000, 0xaf3d7f2100000000, + 0xee26f34f00000000, 0x2d0b67fc00000000, 0x6c10eb9200000000, + 0x6080bc8200000000, 0x219b30ec00000000, 0xe2b6a45f00000000, + 0xa3ad283100000000, 0x25ebfde300000000, 0x64f0718d00000000, + 0xa7dde53e00000000, 0xe6c6695000000000, 0x6b50369e00000000, + 0x2a4bbaf000000000, 0xe9662e4300000000, 0xa87da22d00000000, + 0x2e3b77ff00000000, 0x6f20fb9100000000, 0xac0d6f2200000000, + 0xed16e34c00000000, 0xe186b45c00000000, 0xa09d383200000000, + 0x63b0ac8100000000, 0x22ab20ef00000000, 0xa4edf53d00000000, + 0xe5f6795300000000, 0x26dbede000000000, 0x67c0618e00000000, + 0x3efb42c000000000, 0x7fe0ceae00000000, 0xbccd5a1d00000000, + 0xfdd6d67300000000, 0x7b9003a100000000, 0x3a8b8fcf00000000, + 0xf9a61b7c00000000, 0xb8bd971200000000, 0xb42dc00200000000, + 0xf5364c6c00000000, 0x361bd8df00000000, 0x770054b100000000, + 0xf146816300000000, 0xb05d0d0d00000000, 0x737099be00000000, + 0x326b15d000000000, 0xc106df2200000000, 0x801d534c00000000, + 0x4330c7ff00000000, 0x022b4b9100000000, 0x846d9e4300000000, + 0xc576122d00000000, 0x065b869e00000000, 0x47400af000000000, + 0x4bd05de000000000, 0x0acbd18e00000000, 0xc9e6453d00000000, + 0x88fdc95300000000, 0x0ebb1c8100000000, 0x4fa090ef00000000, + 0x8c8d045c00000000, 0xcd96883200000000, 0x94adab7c00000000, + 0xd5b6271200000000, 0x169bb3a100000000, 0x57803fcf00000000, + 0xd1c6ea1d00000000, 0x90dd667300000000, 0x53f0f2c000000000, + 0x12eb7eae00000000, 0x1e7b29be00000000, 0x5f60a5d000000000, + 0x9c4d316300000000, 0xdd56bd0d00000000, 0x5b1068df00000000, + 0x1a0be4b100000000, 0xd926700200000000, 0x983dfc6c00000000, + 0x7efb953c00000000, 0x3fe0195200000000, 0xfccd8de100000000, + 0xbdd6018f00000000, 0x3b90d45d00000000, 0x7a8b583300000000, + 0xb9a6cc8000000000, 0xf8bd40ee00000000, 0xf42d17fe00000000, + 0xb5369b9000000000, 0x761b0f2300000000, 0x3700834d00000000, + 0xb146569f00000000, 0xf05ddaf100000000, 0x33704e4200000000, + 0x726bc22c00000000, 0x2b50e16200000000, 0x6a4b6d0c00000000, + 0xa966f9bf00000000, 0xe87d75d100000000, 0x6e3ba00300000000, + 0x2f202c6d00000000, 0xec0db8de00000000, 0xad1634b000000000, + 0xa18663a000000000, 0xe09defce00000000, 0x23b07b7d00000000, + 0x62abf71300000000, 0xe4ed22c100000000, 0xa5f6aeaf00000000, + 0x66db3a1c00000000, 0x27c0b67200000000, 0xd4ad7c8000000000, + 0x95b6f0ee00000000, 0x569b645d00000000, 0x1780e83300000000, + 0x91c63de100000000, 0xd0ddb18f00000000, 0x13f0253c00000000, + 0x52eba95200000000, 0x5e7bfe4200000000, 0x1f60722c00000000, + 0xdc4de69f00000000, 0x9d566af100000000, 0x1b10bf2300000000, + 0x5a0b334d00000000, 0x9926a7fe00000000, 0xd83d2b9000000000, + 0x810608de00000000, 0xc01d84b000000000, 0x0330100300000000, + 0x422b9c6d00000000, 0xc46d49bf00000000, 0x8576c5d100000000, + 0x465b516200000000, 0x0740dd0c00000000, 0x0bd08a1c00000000, + 0x4acb067200000000, 0x89e692c100000000, 0xc8fd1eaf00000000, + 0x4ebbcb7d00000000, 0x0fa0471300000000, 0xcc8dd3a000000000, + 0x8d965fce00000000}, + {0x0000000000000000, 0x1dfdb50100000000, 0x3afa6b0300000000, + 0x2707de0200000000, 0x74f4d70600000000, 0x6909620700000000, + 0x4e0ebc0500000000, 0x53f3090400000000, 0xe8e8af0d00000000, + 0xf5151a0c00000000, 0xd212c40e00000000, 0xcfef710f00000000, + 0x9c1c780b00000000, 0x81e1cd0a00000000, 0xa6e6130800000000, + 0xbb1ba60900000000, 0xd0d15f1b00000000, 0xcd2cea1a00000000, + 0xea2b341800000000, 0xf7d6811900000000, 0xa425881d00000000, + 0xb9d83d1c00000000, 0x9edfe31e00000000, 0x8322561f00000000, + 0x3839f01600000000, 0x25c4451700000000, 0x02c39b1500000000, + 0x1f3e2e1400000000, 0x4ccd271000000000, 0x5130921100000000, + 0x76374c1300000000, 0x6bcaf91200000000, 0xa0a3bf3600000000, + 0xbd5e0a3700000000, 0x9a59d43500000000, 0x87a4613400000000, + 0xd457683000000000, 0xc9aadd3100000000, 0xeead033300000000, + 0xf350b63200000000, 0x484b103b00000000, 0x55b6a53a00000000, + 0x72b17b3800000000, 0x6f4cce3900000000, 0x3cbfc73d00000000, + 0x2142723c00000000, 0x0645ac3e00000000, 0x1bb8193f00000000, + 0x7072e02d00000000, 0x6d8f552c00000000, 0x4a888b2e00000000, + 0x57753e2f00000000, 0x0486372b00000000, 0x197b822a00000000, + 0x3e7c5c2800000000, 0x2381e92900000000, 0x989a4f2000000000, + 0x8567fa2100000000, 0xa260242300000000, 0xbf9d912200000000, + 0xec6e982600000000, 0xf1932d2700000000, 0xd694f32500000000, + 0xcb69462400000000, 0x40477f6d00000000, 0x5dbaca6c00000000, + 0x7abd146e00000000, 0x6740a16f00000000, 0x34b3a86b00000000, + 0x294e1d6a00000000, 0x0e49c36800000000, 0x13b4766900000000, + 0xa8afd06000000000, 0xb552656100000000, 0x9255bb6300000000, + 0x8fa80e6200000000, 0xdc5b076600000000, 0xc1a6b26700000000, + 0xe6a16c6500000000, 0xfb5cd96400000000, 0x9096207600000000, + 0x8d6b957700000000, 0xaa6c4b7500000000, 0xb791fe7400000000, + 0xe462f77000000000, 0xf99f427100000000, 0xde989c7300000000, + 0xc365297200000000, 0x787e8f7b00000000, 0x65833a7a00000000, + 0x4284e47800000000, 0x5f79517900000000, 0x0c8a587d00000000, + 0x1177ed7c00000000, 0x3670337e00000000, 0x2b8d867f00000000, + 0xe0e4c05b00000000, 0xfd19755a00000000, 0xda1eab5800000000, + 0xc7e31e5900000000, 0x9410175d00000000, 0x89eda25c00000000, + 0xaeea7c5e00000000, 0xb317c95f00000000, 0x080c6f5600000000, + 0x15f1da5700000000, 0x32f6045500000000, 0x2f0bb15400000000, + 0x7cf8b85000000000, 0x61050d5100000000, 0x4602d35300000000, + 0x5bff665200000000, 0x30359f4000000000, 0x2dc82a4100000000, + 0x0acff44300000000, 0x1732414200000000, 0x44c1484600000000, + 0x593cfd4700000000, 0x7e3b234500000000, 0x63c6964400000000, + 0xd8dd304d00000000, 0xc520854c00000000, 0xe2275b4e00000000, + 0xffdaee4f00000000, 0xac29e74b00000000, 0xb1d4524a00000000, + 0x96d38c4800000000, 0x8b2e394900000000, 0x808efeda00000000, + 0x9d734bdb00000000, 0xba7495d900000000, 0xa78920d800000000, + 0xf47a29dc00000000, 0xe9879cdd00000000, 0xce8042df00000000, + 0xd37df7de00000000, 0x686651d700000000, 0x759be4d600000000, + 0x529c3ad400000000, 0x4f618fd500000000, 0x1c9286d100000000, + 0x016f33d000000000, 0x2668edd200000000, 0x3b9558d300000000, + 0x505fa1c100000000, 0x4da214c000000000, 0x6aa5cac200000000, + 0x77587fc300000000, 0x24ab76c700000000, 0x3956c3c600000000, + 0x1e511dc400000000, 0x03aca8c500000000, 0xb8b70ecc00000000, + 0xa54abbcd00000000, 0x824d65cf00000000, 0x9fb0d0ce00000000, + 0xcc43d9ca00000000, 0xd1be6ccb00000000, 0xf6b9b2c900000000, + 0xeb4407c800000000, 0x202d41ec00000000, 0x3dd0f4ed00000000, + 0x1ad72aef00000000, 0x072a9fee00000000, 0x54d996ea00000000, + 0x492423eb00000000, 0x6e23fde900000000, 0x73de48e800000000, + 0xc8c5eee100000000, 0xd5385be000000000, 0xf23f85e200000000, + 0xefc230e300000000, 0xbc3139e700000000, 0xa1cc8ce600000000, + 0x86cb52e400000000, 0x9b36e7e500000000, 0xf0fc1ef700000000, + 0xed01abf600000000, 0xca0675f400000000, 0xd7fbc0f500000000, + 0x8408c9f100000000, 0x99f57cf000000000, 0xbef2a2f200000000, + 0xa30f17f300000000, 0x1814b1fa00000000, 0x05e904fb00000000, + 0x22eedaf900000000, 0x3f136ff800000000, 0x6ce066fc00000000, + 0x711dd3fd00000000, 0x561a0dff00000000, 0x4be7b8fe00000000, + 0xc0c981b700000000, 0xdd3434b600000000, 0xfa33eab400000000, + 0xe7ce5fb500000000, 0xb43d56b100000000, 0xa9c0e3b000000000, + 0x8ec73db200000000, 0x933a88b300000000, 0x28212eba00000000, + 0x35dc9bbb00000000, 0x12db45b900000000, 0x0f26f0b800000000, + 0x5cd5f9bc00000000, 0x41284cbd00000000, 0x662f92bf00000000, + 0x7bd227be00000000, 0x1018deac00000000, 0x0de56bad00000000, + 0x2ae2b5af00000000, 0x371f00ae00000000, 0x64ec09aa00000000, + 0x7911bcab00000000, 0x5e1662a900000000, 0x43ebd7a800000000, + 0xf8f071a100000000, 0xe50dc4a000000000, 0xc20a1aa200000000, + 0xdff7afa300000000, 0x8c04a6a700000000, 0x91f913a600000000, + 0xb6fecda400000000, 0xab0378a500000000, 0x606a3e8100000000, + 0x7d978b8000000000, 0x5a90558200000000, 0x476de08300000000, + 0x149ee98700000000, 0x09635c8600000000, 0x2e64828400000000, + 0x3399378500000000, 0x8882918c00000000, 0x957f248d00000000, + 0xb278fa8f00000000, 0xaf854f8e00000000, 0xfc76468a00000000, + 0xe18bf38b00000000, 0xc68c2d8900000000, 0xdb71988800000000, + 0xb0bb619a00000000, 0xad46d49b00000000, 0x8a410a9900000000, + 0x97bcbf9800000000, 0xc44fb69c00000000, 0xd9b2039d00000000, + 0xfeb5dd9f00000000, 0xe348689e00000000, 0x5853ce9700000000, + 0x45ae7b9600000000, 0x62a9a59400000000, 0x7f54109500000000, + 0x2ca7199100000000, 0x315aac9000000000, 0x165d729200000000, + 0x0ba0c79300000000}, + {0x0000000000000000, 0x24d9076300000000, 0x48b20fc600000000, + 0x6c6b08a500000000, 0xd1626e5700000000, 0xf5bb693400000000, + 0x99d0619100000000, 0xbd0966f200000000, 0xa2c5dcae00000000, + 0x861cdbcd00000000, 0xea77d36800000000, 0xceaed40b00000000, + 0x73a7b2f900000000, 0x577eb59a00000000, 0x3b15bd3f00000000, + 0x1fccba5c00000000, 0x058dc88600000000, 0x2154cfe500000000, + 0x4d3fc74000000000, 0x69e6c02300000000, 0xd4efa6d100000000, + 0xf036a1b200000000, 0x9c5da91700000000, 0xb884ae7400000000, + 0xa748142800000000, 0x8391134b00000000, 0xeffa1bee00000000, + 0xcb231c8d00000000, 0x762a7a7f00000000, 0x52f37d1c00000000, + 0x3e9875b900000000, 0x1a4172da00000000, 0x4b1ce0d600000000, + 0x6fc5e7b500000000, 0x03aeef1000000000, 0x2777e87300000000, + 0x9a7e8e8100000000, 0xbea789e200000000, 0xd2cc814700000000, + 0xf615862400000000, 0xe9d93c7800000000, 0xcd003b1b00000000, + 0xa16b33be00000000, 0x85b234dd00000000, 0x38bb522f00000000, + 0x1c62554c00000000, 0x70095de900000000, 0x54d05a8a00000000, + 0x4e91285000000000, 0x6a482f3300000000, 0x0623279600000000, + 0x22fa20f500000000, 0x9ff3460700000000, 0xbb2a416400000000, + 0xd74149c100000000, 0xf3984ea200000000, 0xec54f4fe00000000, + 0xc88df39d00000000, 0xa4e6fb3800000000, 0x803ffc5b00000000, + 0x3d369aa900000000, 0x19ef9dca00000000, 0x7584956f00000000, + 0x515d920c00000000, 0xd73eb17600000000, 0xf3e7b61500000000, + 0x9f8cbeb000000000, 0xbb55b9d300000000, 0x065cdf2100000000, + 0x2285d84200000000, 0x4eeed0e700000000, 0x6a37d78400000000, + 0x75fb6dd800000000, 0x51226abb00000000, 0x3d49621e00000000, + 0x1990657d00000000, 0xa499038f00000000, 0x804004ec00000000, + 0xec2b0c4900000000, 0xc8f20b2a00000000, 0xd2b379f000000000, + 0xf66a7e9300000000, 0x9a01763600000000, 0xbed8715500000000, + 0x03d117a700000000, 0x270810c400000000, 0x4b63186100000000, + 0x6fba1f0200000000, 0x7076a55e00000000, 0x54afa23d00000000, + 0x38c4aa9800000000, 0x1c1dadfb00000000, 0xa114cb0900000000, + 0x85cdcc6a00000000, 0xe9a6c4cf00000000, 0xcd7fc3ac00000000, + 0x9c2251a000000000, 0xb8fb56c300000000, 0xd4905e6600000000, + 0xf049590500000000, 0x4d403ff700000000, 0x6999389400000000, + 0x05f2303100000000, 0x212b375200000000, 0x3ee78d0e00000000, + 0x1a3e8a6d00000000, 0x765582c800000000, 0x528c85ab00000000, + 0xef85e35900000000, 0xcb5ce43a00000000, 0xa737ec9f00000000, + 0x83eeebfc00000000, 0x99af992600000000, 0xbd769e4500000000, + 0xd11d96e000000000, 0xf5c4918300000000, 0x48cdf77100000000, + 0x6c14f01200000000, 0x007ff8b700000000, 0x24a6ffd400000000, + 0x3b6a458800000000, 0x1fb342eb00000000, 0x73d84a4e00000000, + 0x57014d2d00000000, 0xea082bdf00000000, 0xced12cbc00000000, + 0xa2ba241900000000, 0x8663237a00000000, 0xae7d62ed00000000, + 0x8aa4658e00000000, 0xe6cf6d2b00000000, 0xc2166a4800000000, + 0x7f1f0cba00000000, 0x5bc60bd900000000, 0x37ad037c00000000, + 0x1374041f00000000, 0x0cb8be4300000000, 0x2861b92000000000, + 0x440ab18500000000, 0x60d3b6e600000000, 0xdddad01400000000, + 0xf903d77700000000, 0x9568dfd200000000, 0xb1b1d8b100000000, + 0xabf0aa6b00000000, 0x8f29ad0800000000, 0xe342a5ad00000000, + 0xc79ba2ce00000000, 0x7a92c43c00000000, 0x5e4bc35f00000000, + 0x3220cbfa00000000, 0x16f9cc9900000000, 0x093576c500000000, + 0x2dec71a600000000, 0x4187790300000000, 0x655e7e6000000000, + 0xd857189200000000, 0xfc8e1ff100000000, 0x90e5175400000000, + 0xb43c103700000000, 0xe561823b00000000, 0xc1b8855800000000, + 0xadd38dfd00000000, 0x890a8a9e00000000, 0x3403ec6c00000000, + 0x10daeb0f00000000, 0x7cb1e3aa00000000, 0x5868e4c900000000, + 0x47a45e9500000000, 0x637d59f600000000, 0x0f16515300000000, + 0x2bcf563000000000, 0x96c630c200000000, 0xb21f37a100000000, + 0xde743f0400000000, 0xfaad386700000000, 0xe0ec4abd00000000, + 0xc4354dde00000000, 0xa85e457b00000000, 0x8c87421800000000, + 0x318e24ea00000000, 0x1557238900000000, 0x793c2b2c00000000, + 0x5de52c4f00000000, 0x4229961300000000, 0x66f0917000000000, + 0x0a9b99d500000000, 0x2e429eb600000000, 0x934bf84400000000, + 0xb792ff2700000000, 0xdbf9f78200000000, 0xff20f0e100000000, + 0x7943d39b00000000, 0x5d9ad4f800000000, 0x31f1dc5d00000000, + 0x1528db3e00000000, 0xa821bdcc00000000, 0x8cf8baaf00000000, + 0xe093b20a00000000, 0xc44ab56900000000, 0xdb860f3500000000, + 0xff5f085600000000, 0x933400f300000000, 0xb7ed079000000000, + 0x0ae4616200000000, 0x2e3d660100000000, 0x42566ea400000000, + 0x668f69c700000000, 0x7cce1b1d00000000, 0x58171c7e00000000, + 0x347c14db00000000, 0x10a513b800000000, 0xadac754a00000000, + 0x8975722900000000, 0xe51e7a8c00000000, 0xc1c77def00000000, + 0xde0bc7b300000000, 0xfad2c0d000000000, 0x96b9c87500000000, + 0xb260cf1600000000, 0x0f69a9e400000000, 0x2bb0ae8700000000, + 0x47dba62200000000, 0x6302a14100000000, 0x325f334d00000000, + 0x1686342e00000000, 0x7aed3c8b00000000, 0x5e343be800000000, + 0xe33d5d1a00000000, 0xc7e45a7900000000, 0xab8f52dc00000000, + 0x8f5655bf00000000, 0x909aefe300000000, 0xb443e88000000000, + 0xd828e02500000000, 0xfcf1e74600000000, 0x41f881b400000000, + 0x652186d700000000, 0x094a8e7200000000, 0x2d93891100000000, + 0x37d2fbcb00000000, 0x130bfca800000000, 0x7f60f40d00000000, + 0x5bb9f36e00000000, 0xe6b0959c00000000, 0xc26992ff00000000, + 0xae029a5a00000000, 0x8adb9d3900000000, 0x9517276500000000, + 0xb1ce200600000000, 0xdda528a300000000, 0xf97c2fc000000000, + 0x4475493200000000, 0x60ac4e5100000000, 0x0cc746f400000000, + 0x281e419700000000}, + {0x0000000000000000, 0x08e3603c00000000, 0x10c6c17800000000, + 0x1825a14400000000, 0x208c83f100000000, 0x286fe3cd00000000, + 0x304a428900000000, 0x38a922b500000000, 0x011e763800000000, + 0x09fd160400000000, 0x11d8b74000000000, 0x193bd77c00000000, + 0x2192f5c900000000, 0x297195f500000000, 0x315434b100000000, + 0x39b7548d00000000, 0x023cec7000000000, 0x0adf8c4c00000000, + 0x12fa2d0800000000, 0x1a194d3400000000, 0x22b06f8100000000, + 0x2a530fbd00000000, 0x3276aef900000000, 0x3a95cec500000000, + 0x03229a4800000000, 0x0bc1fa7400000000, 0x13e45b3000000000, + 0x1b073b0c00000000, 0x23ae19b900000000, 0x2b4d798500000000, + 0x3368d8c100000000, 0x3b8bb8fd00000000, 0x0478d8e100000000, + 0x0c9bb8dd00000000, 0x14be199900000000, 0x1c5d79a500000000, + 0x24f45b1000000000, 0x2c173b2c00000000, 0x34329a6800000000, + 0x3cd1fa5400000000, 0x0566aed900000000, 0x0d85cee500000000, + 0x15a06fa100000000, 0x1d430f9d00000000, 0x25ea2d2800000000, + 0x2d094d1400000000, 0x352cec5000000000, 0x3dcf8c6c00000000, + 0x0644349100000000, 0x0ea754ad00000000, 0x1682f5e900000000, + 0x1e6195d500000000, 0x26c8b76000000000, 0x2e2bd75c00000000, + 0x360e761800000000, 0x3eed162400000000, 0x075a42a900000000, + 0x0fb9229500000000, 0x179c83d100000000, 0x1f7fe3ed00000000, + 0x27d6c15800000000, 0x2f35a16400000000, 0x3710002000000000, + 0x3ff3601c00000000, 0x49f6c11800000000, 0x4115a12400000000, + 0x5930006000000000, 0x51d3605c00000000, 0x697a42e900000000, + 0x619922d500000000, 0x79bc839100000000, 0x715fe3ad00000000, + 0x48e8b72000000000, 0x400bd71c00000000, 0x582e765800000000, + 0x50cd166400000000, 0x686434d100000000, 0x608754ed00000000, + 0x78a2f5a900000000, 0x7041959500000000, 0x4bca2d6800000000, + 0x43294d5400000000, 0x5b0cec1000000000, 0x53ef8c2c00000000, + 0x6b46ae9900000000, 0x63a5cea500000000, 0x7b806fe100000000, + 0x73630fdd00000000, 0x4ad45b5000000000, 0x42373b6c00000000, + 0x5a129a2800000000, 0x52f1fa1400000000, 0x6a58d8a100000000, + 0x62bbb89d00000000, 0x7a9e19d900000000, 0x727d79e500000000, + 0x4d8e19f900000000, 0x456d79c500000000, 0x5d48d88100000000, + 0x55abb8bd00000000, 0x6d029a0800000000, 0x65e1fa3400000000, + 0x7dc45b7000000000, 0x75273b4c00000000, 0x4c906fc100000000, + 0x44730ffd00000000, 0x5c56aeb900000000, 0x54b5ce8500000000, + 0x6c1cec3000000000, 0x64ff8c0c00000000, 0x7cda2d4800000000, + 0x74394d7400000000, 0x4fb2f58900000000, 0x475195b500000000, + 0x5f7434f100000000, 0x579754cd00000000, 0x6f3e767800000000, + 0x67dd164400000000, 0x7ff8b70000000000, 0x771bd73c00000000, + 0x4eac83b100000000, 0x464fe38d00000000, 0x5e6a42c900000000, + 0x568922f500000000, 0x6e20004000000000, 0x66c3607c00000000, + 0x7ee6c13800000000, 0x7605a10400000000, 0x92ec833100000000, + 0x9a0fe30d00000000, 0x822a424900000000, 0x8ac9227500000000, + 0xb26000c000000000, 0xba8360fc00000000, 0xa2a6c1b800000000, + 0xaa45a18400000000, 0x93f2f50900000000, 0x9b11953500000000, + 0x8334347100000000, 0x8bd7544d00000000, 0xb37e76f800000000, + 0xbb9d16c400000000, 0xa3b8b78000000000, 0xab5bd7bc00000000, + 0x90d06f4100000000, 0x98330f7d00000000, 0x8016ae3900000000, + 0x88f5ce0500000000, 0xb05cecb000000000, 0xb8bf8c8c00000000, + 0xa09a2dc800000000, 0xa8794df400000000, 0x91ce197900000000, + 0x992d794500000000, 0x8108d80100000000, 0x89ebb83d00000000, + 0xb1429a8800000000, 0xb9a1fab400000000, 0xa1845bf000000000, + 0xa9673bcc00000000, 0x96945bd000000000, 0x9e773bec00000000, + 0x86529aa800000000, 0x8eb1fa9400000000, 0xb618d82100000000, + 0xbefbb81d00000000, 0xa6de195900000000, 0xae3d796500000000, + 0x978a2de800000000, 0x9f694dd400000000, 0x874cec9000000000, + 0x8faf8cac00000000, 0xb706ae1900000000, 0xbfe5ce2500000000, + 0xa7c06f6100000000, 0xaf230f5d00000000, 0x94a8b7a000000000, + 0x9c4bd79c00000000, 0x846e76d800000000, 0x8c8d16e400000000, + 0xb424345100000000, 0xbcc7546d00000000, 0xa4e2f52900000000, + 0xac01951500000000, 0x95b6c19800000000, 0x9d55a1a400000000, + 0x857000e000000000, 0x8d9360dc00000000, 0xb53a426900000000, + 0xbdd9225500000000, 0xa5fc831100000000, 0xad1fe32d00000000, + 0xdb1a422900000000, 0xd3f9221500000000, 0xcbdc835100000000, + 0xc33fe36d00000000, 0xfb96c1d800000000, 0xf375a1e400000000, + 0xeb5000a000000000, 0xe3b3609c00000000, 0xda04341100000000, + 0xd2e7542d00000000, 0xcac2f56900000000, 0xc221955500000000, + 0xfa88b7e000000000, 0xf26bd7dc00000000, 0xea4e769800000000, + 0xe2ad16a400000000, 0xd926ae5900000000, 0xd1c5ce6500000000, + 0xc9e06f2100000000, 0xc1030f1d00000000, 0xf9aa2da800000000, + 0xf1494d9400000000, 0xe96cecd000000000, 0xe18f8cec00000000, + 0xd838d86100000000, 0xd0dbb85d00000000, 0xc8fe191900000000, + 0xc01d792500000000, 0xf8b45b9000000000, 0xf0573bac00000000, + 0xe8729ae800000000, 0xe091fad400000000, 0xdf629ac800000000, + 0xd781faf400000000, 0xcfa45bb000000000, 0xc7473b8c00000000, + 0xffee193900000000, 0xf70d790500000000, 0xef28d84100000000, + 0xe7cbb87d00000000, 0xde7cecf000000000, 0xd69f8ccc00000000, + 0xceba2d8800000000, 0xc6594db400000000, 0xfef06f0100000000, + 0xf6130f3d00000000, 0xee36ae7900000000, 0xe6d5ce4500000000, + 0xdd5e76b800000000, 0xd5bd168400000000, 0xcd98b7c000000000, + 0xc57bd7fc00000000, 0xfdd2f54900000000, 0xf531957500000000, + 0xed14343100000000, 0xe5f7540d00000000, 0xdc40008000000000, + 0xd4a360bc00000000, 0xcc86c1f800000000, 0xc465a1c400000000, + 0xfccc837100000000, 0xf42fe34d00000000, 0xec0a420900000000, + 0xe4e9223500000000}, + {0x0000000000000000, 0xd1e8e70e00000000, 0xa2d1cf1d00000000, + 0x7339281300000000, 0x44a39f3b00000000, 0x954b783500000000, + 0xe672502600000000, 0x379ab72800000000, 0x88463f7700000000, + 0x59aed87900000000, 0x2a97f06a00000000, 0xfb7f176400000000, + 0xcce5a04c00000000, 0x1d0d474200000000, 0x6e346f5100000000, + 0xbfdc885f00000000, 0x108d7eee00000000, 0xc16599e000000000, + 0xb25cb1f300000000, 0x63b456fd00000000, 0x542ee1d500000000, + 0x85c606db00000000, 0xf6ff2ec800000000, 0x2717c9c600000000, + 0x98cb419900000000, 0x4923a69700000000, 0x3a1a8e8400000000, + 0xebf2698a00000000, 0xdc68dea200000000, 0x0d8039ac00000000, + 0x7eb911bf00000000, 0xaf51f6b100000000, 0x611c8c0700000000, + 0xb0f46b0900000000, 0xc3cd431a00000000, 0x1225a41400000000, + 0x25bf133c00000000, 0xf457f43200000000, 0x876edc2100000000, + 0x56863b2f00000000, 0xe95ab37000000000, 0x38b2547e00000000, + 0x4b8b7c6d00000000, 0x9a639b6300000000, 0xadf92c4b00000000, + 0x7c11cb4500000000, 0x0f28e35600000000, 0xdec0045800000000, + 0x7191f2e900000000, 0xa07915e700000000, 0xd3403df400000000, + 0x02a8dafa00000000, 0x35326dd200000000, 0xe4da8adc00000000, + 0x97e3a2cf00000000, 0x460b45c100000000, 0xf9d7cd9e00000000, + 0x283f2a9000000000, 0x5b06028300000000, 0x8aeee58d00000000, + 0xbd7452a500000000, 0x6c9cb5ab00000000, 0x1fa59db800000000, + 0xce4d7ab600000000, 0xc238180f00000000, 0x13d0ff0100000000, + 0x60e9d71200000000, 0xb101301c00000000, 0x869b873400000000, + 0x5773603a00000000, 0x244a482900000000, 0xf5a2af2700000000, + 0x4a7e277800000000, 0x9b96c07600000000, 0xe8afe86500000000, + 0x39470f6b00000000, 0x0eddb84300000000, 0xdf355f4d00000000, + 0xac0c775e00000000, 0x7de4905000000000, 0xd2b566e100000000, + 0x035d81ef00000000, 0x7064a9fc00000000, 0xa18c4ef200000000, + 0x9616f9da00000000, 0x47fe1ed400000000, 0x34c736c700000000, + 0xe52fd1c900000000, 0x5af3599600000000, 0x8b1bbe9800000000, + 0xf822968b00000000, 0x29ca718500000000, 0x1e50c6ad00000000, + 0xcfb821a300000000, 0xbc8109b000000000, 0x6d69eebe00000000, + 0xa324940800000000, 0x72cc730600000000, 0x01f55b1500000000, + 0xd01dbc1b00000000, 0xe7870b3300000000, 0x366fec3d00000000, + 0x4556c42e00000000, 0x94be232000000000, 0x2b62ab7f00000000, + 0xfa8a4c7100000000, 0x89b3646200000000, 0x585b836c00000000, + 0x6fc1344400000000, 0xbe29d34a00000000, 0xcd10fb5900000000, + 0x1cf81c5700000000, 0xb3a9eae600000000, 0x62410de800000000, + 0x117825fb00000000, 0xc090c2f500000000, 0xf70a75dd00000000, + 0x26e292d300000000, 0x55dbbac000000000, 0x84335dce00000000, + 0x3befd59100000000, 0xea07329f00000000, 0x993e1a8c00000000, + 0x48d6fd8200000000, 0x7f4c4aaa00000000, 0xaea4ada400000000, + 0xdd9d85b700000000, 0x0c7562b900000000, 0x8471301e00000000, + 0x5599d71000000000, 0x26a0ff0300000000, 0xf748180d00000000, + 0xc0d2af2500000000, 0x113a482b00000000, 0x6203603800000000, + 0xb3eb873600000000, 0x0c370f6900000000, 0xdddfe86700000000, + 0xaee6c07400000000, 0x7f0e277a00000000, 0x4894905200000000, + 0x997c775c00000000, 0xea455f4f00000000, 0x3badb84100000000, + 0x94fc4ef000000000, 0x4514a9fe00000000, 0x362d81ed00000000, + 0xe7c566e300000000, 0xd05fd1cb00000000, 0x01b736c500000000, + 0x728e1ed600000000, 0xa366f9d800000000, 0x1cba718700000000, + 0xcd52968900000000, 0xbe6bbe9a00000000, 0x6f83599400000000, + 0x5819eebc00000000, 0x89f109b200000000, 0xfac821a100000000, + 0x2b20c6af00000000, 0xe56dbc1900000000, 0x34855b1700000000, + 0x47bc730400000000, 0x9654940a00000000, 0xa1ce232200000000, + 0x7026c42c00000000, 0x031fec3f00000000, 0xd2f70b3100000000, + 0x6d2b836e00000000, 0xbcc3646000000000, 0xcffa4c7300000000, + 0x1e12ab7d00000000, 0x29881c5500000000, 0xf860fb5b00000000, + 0x8b59d34800000000, 0x5ab1344600000000, 0xf5e0c2f700000000, + 0x240825f900000000, 0x57310dea00000000, 0x86d9eae400000000, + 0xb1435dcc00000000, 0x60abbac200000000, 0x139292d100000000, + 0xc27a75df00000000, 0x7da6fd8000000000, 0xac4e1a8e00000000, + 0xdf77329d00000000, 0x0e9fd59300000000, 0x390562bb00000000, + 0xe8ed85b500000000, 0x9bd4ada600000000, 0x4a3c4aa800000000, + 0x4649281100000000, 0x97a1cf1f00000000, 0xe498e70c00000000, + 0x3570000200000000, 0x02eab72a00000000, 0xd302502400000000, + 0xa03b783700000000, 0x71d39f3900000000, 0xce0f176600000000, + 0x1fe7f06800000000, 0x6cded87b00000000, 0xbd363f7500000000, + 0x8aac885d00000000, 0x5b446f5300000000, 0x287d474000000000, + 0xf995a04e00000000, 0x56c456ff00000000, 0x872cb1f100000000, + 0xf41599e200000000, 0x25fd7eec00000000, 0x1267c9c400000000, + 0xc38f2eca00000000, 0xb0b606d900000000, 0x615ee1d700000000, + 0xde82698800000000, 0x0f6a8e8600000000, 0x7c53a69500000000, + 0xadbb419b00000000, 0x9a21f6b300000000, 0x4bc911bd00000000, + 0x38f039ae00000000, 0xe918dea000000000, 0x2755a41600000000, + 0xf6bd431800000000, 0x85846b0b00000000, 0x546c8c0500000000, + 0x63f63b2d00000000, 0xb21edc2300000000, 0xc127f43000000000, + 0x10cf133e00000000, 0xaf139b6100000000, 0x7efb7c6f00000000, + 0x0dc2547c00000000, 0xdc2ab37200000000, 0xebb0045a00000000, + 0x3a58e35400000000, 0x4961cb4700000000, 0x98892c4900000000, + 0x37d8daf800000000, 0xe6303df600000000, 0x950915e500000000, + 0x44e1f2eb00000000, 0x737b45c300000000, 0xa293a2cd00000000, + 0xd1aa8ade00000000, 0x00426dd000000000, 0xbf9ee58f00000000, + 0x6e76028100000000, 0x1d4f2a9200000000, 0xcca7cd9c00000000, + 0xfb3d7ab400000000, 0x2ad59dba00000000, 0x59ecb5a900000000, + 0x880452a700000000}, + {0x0000000000000000, 0xaa05daf100000000, 0x150dc53800000000, + 0xbf081fc900000000, 0x2a1a8a7100000000, 0x801f508000000000, + 0x3f174f4900000000, 0x951295b800000000, 0x543414e300000000, + 0xfe31ce1200000000, 0x4139d1db00000000, 0xeb3c0b2a00000000, + 0x7e2e9e9200000000, 0xd42b446300000000, 0x6b235baa00000000, + 0xc126815b00000000, 0xe96e591d00000000, 0x436b83ec00000000, + 0xfc639c2500000000, 0x566646d400000000, 0xc374d36c00000000, + 0x6971099d00000000, 0xd679165400000000, 0x7c7ccca500000000, + 0xbd5a4dfe00000000, 0x175f970f00000000, 0xa85788c600000000, + 0x0252523700000000, 0x9740c78f00000000, 0x3d451d7e00000000, + 0x824d02b700000000, 0x2848d84600000000, 0xd2ddb23a00000000, + 0x78d868cb00000000, 0xc7d0770200000000, 0x6dd5adf300000000, + 0xf8c7384b00000000, 0x52c2e2ba00000000, 0xedcafd7300000000, + 0x47cf278200000000, 0x86e9a6d900000000, 0x2cec7c2800000000, + 0x93e463e100000000, 0x39e1b91000000000, 0xacf32ca800000000, + 0x06f6f65900000000, 0xb9fee99000000000, 0x13fb336100000000, + 0x3bb3eb2700000000, 0x91b631d600000000, 0x2ebe2e1f00000000, + 0x84bbf4ee00000000, 0x11a9615600000000, 0xbbacbba700000000, + 0x04a4a46e00000000, 0xaea17e9f00000000, 0x6f87ffc400000000, + 0xc582253500000000, 0x7a8a3afc00000000, 0xd08fe00d00000000, + 0x459d75b500000000, 0xef98af4400000000, 0x5090b08d00000000, + 0xfa956a7c00000000, 0xa4bb657500000000, 0x0ebebf8400000000, + 0xb1b6a04d00000000, 0x1bb37abc00000000, 0x8ea1ef0400000000, + 0x24a435f500000000, 0x9bac2a3c00000000, 0x31a9f0cd00000000, + 0xf08f719600000000, 0x5a8aab6700000000, 0xe582b4ae00000000, + 0x4f876e5f00000000, 0xda95fbe700000000, 0x7090211600000000, + 0xcf983edf00000000, 0x659de42e00000000, 0x4dd53c6800000000, + 0xe7d0e69900000000, 0x58d8f95000000000, 0xf2dd23a100000000, + 0x67cfb61900000000, 0xcdca6ce800000000, 0x72c2732100000000, + 0xd8c7a9d000000000, 0x19e1288b00000000, 0xb3e4f27a00000000, + 0x0cecedb300000000, 0xa6e9374200000000, 0x33fba2fa00000000, + 0x99fe780b00000000, 0x26f667c200000000, 0x8cf3bd3300000000, + 0x7666d74f00000000, 0xdc630dbe00000000, 0x636b127700000000, + 0xc96ec88600000000, 0x5c7c5d3e00000000, 0xf67987cf00000000, + 0x4971980600000000, 0xe37442f700000000, 0x2252c3ac00000000, + 0x8857195d00000000, 0x375f069400000000, 0x9d5adc6500000000, + 0x084849dd00000000, 0xa24d932c00000000, 0x1d458ce500000000, + 0xb740561400000000, 0x9f088e5200000000, 0x350d54a300000000, + 0x8a054b6a00000000, 0x2000919b00000000, 0xb512042300000000, + 0x1f17ded200000000, 0xa01fc11b00000000, 0x0a1a1bea00000000, + 0xcb3c9ab100000000, 0x6139404000000000, 0xde315f8900000000, + 0x7434857800000000, 0xe12610c000000000, 0x4b23ca3100000000, + 0xf42bd5f800000000, 0x5e2e0f0900000000, 0x4877cbea00000000, + 0xe272111b00000000, 0x5d7a0ed200000000, 0xf77fd42300000000, + 0x626d419b00000000, 0xc8689b6a00000000, 0x776084a300000000, + 0xdd655e5200000000, 0x1c43df0900000000, 0xb64605f800000000, + 0x094e1a3100000000, 0xa34bc0c000000000, 0x3659557800000000, + 0x9c5c8f8900000000, 0x2354904000000000, 0x89514ab100000000, + 0xa11992f700000000, 0x0b1c480600000000, 0xb41457cf00000000, + 0x1e118d3e00000000, 0x8b03188600000000, 0x2106c27700000000, + 0x9e0eddbe00000000, 0x340b074f00000000, 0xf52d861400000000, + 0x5f285ce500000000, 0xe020432c00000000, 0x4a2599dd00000000, + 0xdf370c6500000000, 0x7532d69400000000, 0xca3ac95d00000000, + 0x603f13ac00000000, 0x9aaa79d000000000, 0x30afa32100000000, + 0x8fa7bce800000000, 0x25a2661900000000, 0xb0b0f3a100000000, + 0x1ab5295000000000, 0xa5bd369900000000, 0x0fb8ec6800000000, + 0xce9e6d3300000000, 0x649bb7c200000000, 0xdb93a80b00000000, + 0x719672fa00000000, 0xe484e74200000000, 0x4e813db300000000, + 0xf189227a00000000, 0x5b8cf88b00000000, 0x73c420cd00000000, + 0xd9c1fa3c00000000, 0x66c9e5f500000000, 0xcccc3f0400000000, + 0x59deaabc00000000, 0xf3db704d00000000, 0x4cd36f8400000000, + 0xe6d6b57500000000, 0x27f0342e00000000, 0x8df5eedf00000000, + 0x32fdf11600000000, 0x98f82be700000000, 0x0deabe5f00000000, + 0xa7ef64ae00000000, 0x18e77b6700000000, 0xb2e2a19600000000, + 0xecccae9f00000000, 0x46c9746e00000000, 0xf9c16ba700000000, + 0x53c4b15600000000, 0xc6d624ee00000000, 0x6cd3fe1f00000000, + 0xd3dbe1d600000000, 0x79de3b2700000000, 0xb8f8ba7c00000000, + 0x12fd608d00000000, 0xadf57f4400000000, 0x07f0a5b500000000, + 0x92e2300d00000000, 0x38e7eafc00000000, 0x87eff53500000000, + 0x2dea2fc400000000, 0x05a2f78200000000, 0xafa72d7300000000, + 0x10af32ba00000000, 0xbaaae84b00000000, 0x2fb87df300000000, + 0x85bda70200000000, 0x3ab5b8cb00000000, 0x90b0623a00000000, + 0x5196e36100000000, 0xfb93399000000000, 0x449b265900000000, + 0xee9efca800000000, 0x7b8c691000000000, 0xd189b3e100000000, + 0x6e81ac2800000000, 0xc48476d900000000, 0x3e111ca500000000, + 0x9414c65400000000, 0x2b1cd99d00000000, 0x8119036c00000000, + 0x140b96d400000000, 0xbe0e4c2500000000, 0x010653ec00000000, + 0xab03891d00000000, 0x6a25084600000000, 0xc020d2b700000000, + 0x7f28cd7e00000000, 0xd52d178f00000000, 0x403f823700000000, + 0xea3a58c600000000, 0x5532470f00000000, 0xff379dfe00000000, + 0xd77f45b800000000, 0x7d7a9f4900000000, 0xc272808000000000, + 0x68775a7100000000, 0xfd65cfc900000000, 0x5760153800000000, + 0xe8680af100000000, 0x426dd00000000000, 0x834b515b00000000, + 0x294e8baa00000000, 0x9646946300000000, 0x3c434e9200000000, + 0xa951db2a00000000, 0x035401db00000000, 0xbc5c1e1200000000, + 0x1659c4e300000000}}; + +#else /* W == 4 */ + +local const z_crc_t FAR crc_braid_table[][256] = { + {0x00000000, 0xae689191, 0x87a02563, 0x29c8b4f2, 0xd4314c87, + 0x7a59dd16, 0x539169e4, 0xfdf9f875, 0x73139f4f, 0xdd7b0ede, + 0xf4b3ba2c, 0x5adb2bbd, 0xa722d3c8, 0x094a4259, 0x2082f6ab, + 0x8eea673a, 0xe6273e9e, 0x484faf0f, 0x61871bfd, 0xcfef8a6c, + 0x32167219, 0x9c7ee388, 0xb5b6577a, 0x1bdec6eb, 0x9534a1d1, + 0x3b5c3040, 0x129484b2, 0xbcfc1523, 0x4105ed56, 0xef6d7cc7, + 0xc6a5c835, 0x68cd59a4, 0x173f7b7d, 0xb957eaec, 0x909f5e1e, + 0x3ef7cf8f, 0xc30e37fa, 0x6d66a66b, 0x44ae1299, 0xeac68308, + 0x642ce432, 0xca4475a3, 0xe38cc151, 0x4de450c0, 0xb01da8b5, + 0x1e753924, 0x37bd8dd6, 0x99d51c47, 0xf11845e3, 0x5f70d472, + 0x76b86080, 0xd8d0f111, 0x25290964, 0x8b4198f5, 0xa2892c07, + 0x0ce1bd96, 0x820bdaac, 0x2c634b3d, 0x05abffcf, 0xabc36e5e, + 0x563a962b, 0xf85207ba, 0xd19ab348, 0x7ff222d9, 0x2e7ef6fa, + 0x8016676b, 0xa9ded399, 0x07b64208, 0xfa4fba7d, 0x54272bec, + 0x7def9f1e, 0xd3870e8f, 0x5d6d69b5, 0xf305f824, 0xdacd4cd6, + 0x74a5dd47, 0x895c2532, 0x2734b4a3, 0x0efc0051, 0xa09491c0, + 0xc859c864, 0x663159f5, 0x4ff9ed07, 0xe1917c96, 0x1c6884e3, + 0xb2001572, 0x9bc8a180, 0x35a03011, 0xbb4a572b, 0x1522c6ba, + 0x3cea7248, 0x9282e3d9, 0x6f7b1bac, 0xc1138a3d, 0xe8db3ecf, + 0x46b3af5e, 0x39418d87, 0x97291c16, 0xbee1a8e4, 0x10893975, + 0xed70c100, 0x43185091, 0x6ad0e463, 0xc4b875f2, 0x4a5212c8, + 0xe43a8359, 0xcdf237ab, 0x639aa63a, 0x9e635e4f, 0x300bcfde, + 0x19c37b2c, 0xb7abeabd, 0xdf66b319, 0x710e2288, 0x58c6967a, + 0xf6ae07eb, 0x0b57ff9e, 0xa53f6e0f, 0x8cf7dafd, 0x229f4b6c, + 0xac752c56, 0x021dbdc7, 0x2bd50935, 0x85bd98a4, 0x784460d1, + 0xd62cf140, 0xffe445b2, 0x518cd423, 0x5cfdedf4, 0xf2957c65, + 0xdb5dc897, 0x75355906, 0x88cca173, 0x26a430e2, 0x0f6c8410, + 0xa1041581, 0x2fee72bb, 0x8186e32a, 0xa84e57d8, 0x0626c649, + 0xfbdf3e3c, 0x55b7afad, 0x7c7f1b5f, 0xd2178ace, 0xbadad36a, + 0x14b242fb, 0x3d7af609, 0x93126798, 0x6eeb9fed, 0xc0830e7c, + 0xe94bba8e, 0x47232b1f, 0xc9c94c25, 0x67a1ddb4, 0x4e696946, + 0xe001f8d7, 0x1df800a2, 0xb3909133, 0x9a5825c1, 0x3430b450, + 0x4bc29689, 0xe5aa0718, 0xcc62b3ea, 0x620a227b, 0x9ff3da0e, + 0x319b4b9f, 0x1853ff6d, 0xb63b6efc, 0x38d109c6, 0x96b99857, + 0xbf712ca5, 0x1119bd34, 0xece04541, 0x4288d4d0, 0x6b406022, + 0xc528f1b3, 0xade5a817, 0x038d3986, 0x2a458d74, 0x842d1ce5, + 0x79d4e490, 0xd7bc7501, 0xfe74c1f3, 0x501c5062, 0xdef63758, + 0x709ea6c9, 0x5956123b, 0xf73e83aa, 0x0ac77bdf, 0xa4afea4e, + 0x8d675ebc, 0x230fcf2d, 0x72831b0e, 0xdceb8a9f, 0xf5233e6d, + 0x5b4baffc, 0xa6b25789, 0x08dac618, 0x211272ea, 0x8f7ae37b, + 0x01908441, 0xaff815d0, 0x8630a122, 0x285830b3, 0xd5a1c8c6, + 0x7bc95957, 0x5201eda5, 0xfc697c34, 0x94a42590, 0x3accb401, + 0x130400f3, 0xbd6c9162, 0x40956917, 0xeefdf886, 0xc7354c74, + 0x695ddde5, 0xe7b7badf, 0x49df2b4e, 0x60179fbc, 0xce7f0e2d, + 0x3386f658, 0x9dee67c9, 0xb426d33b, 0x1a4e42aa, 0x65bc6073, + 0xcbd4f1e2, 0xe21c4510, 0x4c74d481, 0xb18d2cf4, 0x1fe5bd65, + 0x362d0997, 0x98459806, 0x16afff3c, 0xb8c76ead, 0x910fda5f, + 0x3f674bce, 0xc29eb3bb, 0x6cf6222a, 0x453e96d8, 0xeb560749, + 0x839b5eed, 0x2df3cf7c, 0x043b7b8e, 0xaa53ea1f, 0x57aa126a, + 0xf9c283fb, 0xd00a3709, 0x7e62a698, 0xf088c1a2, 0x5ee05033, + 0x7728e4c1, 0xd9407550, 0x24b98d25, 0x8ad11cb4, 0xa319a846, + 0x0d7139d7}, + {0x00000000, 0xb9fbdbe8, 0xa886b191, 0x117d6a79, 0x8a7c6563, + 0x3387be8b, 0x22fad4f2, 0x9b010f1a, 0xcf89cc87, 0x7672176f, + 0x670f7d16, 0xdef4a6fe, 0x45f5a9e4, 0xfc0e720c, 0xed731875, + 0x5488c39d, 0x44629f4f, 0xfd9944a7, 0xece42ede, 0x551ff536, + 0xce1efa2c, 0x77e521c4, 0x66984bbd, 0xdf639055, 0x8beb53c8, + 0x32108820, 0x236de259, 0x9a9639b1, 0x019736ab, 0xb86ced43, + 0xa911873a, 0x10ea5cd2, 0x88c53e9e, 0x313ee576, 0x20438f0f, + 0x99b854e7, 0x02b95bfd, 0xbb428015, 0xaa3fea6c, 0x13c43184, + 0x474cf219, 0xfeb729f1, 0xefca4388, 0x56319860, 0xcd30977a, + 0x74cb4c92, 0x65b626eb, 0xdc4dfd03, 0xcca7a1d1, 0x755c7a39, + 0x64211040, 0xdddacba8, 0x46dbc4b2, 0xff201f5a, 0xee5d7523, + 0x57a6aecb, 0x032e6d56, 0xbad5b6be, 0xaba8dcc7, 0x1253072f, + 0x89520835, 0x30a9d3dd, 0x21d4b9a4, 0x982f624c, 0xcafb7b7d, + 0x7300a095, 0x627dcaec, 0xdb861104, 0x40871e1e, 0xf97cc5f6, + 0xe801af8f, 0x51fa7467, 0x0572b7fa, 0xbc896c12, 0xadf4066b, + 0x140fdd83, 0x8f0ed299, 0x36f50971, 0x27886308, 0x9e73b8e0, + 0x8e99e432, 0x37623fda, 0x261f55a3, 0x9fe48e4b, 0x04e58151, + 0xbd1e5ab9, 0xac6330c0, 0x1598eb28, 0x411028b5, 0xf8ebf35d, + 0xe9969924, 0x506d42cc, 0xcb6c4dd6, 0x7297963e, 0x63eafc47, + 0xda1127af, 0x423e45e3, 0xfbc59e0b, 0xeab8f472, 0x53432f9a, + 0xc8422080, 0x71b9fb68, 0x60c49111, 0xd93f4af9, 0x8db78964, + 0x344c528c, 0x253138f5, 0x9ccae31d, 0x07cbec07, 0xbe3037ef, + 0xaf4d5d96, 0x16b6867e, 0x065cdaac, 0xbfa70144, 0xaeda6b3d, + 0x1721b0d5, 0x8c20bfcf, 0x35db6427, 0x24a60e5e, 0x9d5dd5b6, + 0xc9d5162b, 0x702ecdc3, 0x6153a7ba, 0xd8a87c52, 0x43a97348, + 0xfa52a8a0, 0xeb2fc2d9, 0x52d41931, 0x4e87f0bb, 0xf77c2b53, + 0xe601412a, 0x5ffa9ac2, 0xc4fb95d8, 0x7d004e30, 0x6c7d2449, + 0xd586ffa1, 0x810e3c3c, 0x38f5e7d4, 0x29888dad, 0x90735645, + 0x0b72595f, 0xb28982b7, 0xa3f4e8ce, 0x1a0f3326, 0x0ae56ff4, + 0xb31eb41c, 0xa263de65, 0x1b98058d, 0x80990a97, 0x3962d17f, + 0x281fbb06, 0x91e460ee, 0xc56ca373, 0x7c97789b, 0x6dea12e2, + 0xd411c90a, 0x4f10c610, 0xf6eb1df8, 0xe7967781, 0x5e6dac69, + 0xc642ce25, 0x7fb915cd, 0x6ec47fb4, 0xd73fa45c, 0x4c3eab46, + 0xf5c570ae, 0xe4b81ad7, 0x5d43c13f, 0x09cb02a2, 0xb030d94a, + 0xa14db333, 0x18b668db, 0x83b767c1, 0x3a4cbc29, 0x2b31d650, + 0x92ca0db8, 0x8220516a, 0x3bdb8a82, 0x2aa6e0fb, 0x935d3b13, + 0x085c3409, 0xb1a7efe1, 0xa0da8598, 0x19215e70, 0x4da99ded, + 0xf4524605, 0xe52f2c7c, 0x5cd4f794, 0xc7d5f88e, 0x7e2e2366, + 0x6f53491f, 0xd6a892f7, 0x847c8bc6, 0x3d87502e, 0x2cfa3a57, + 0x9501e1bf, 0x0e00eea5, 0xb7fb354d, 0xa6865f34, 0x1f7d84dc, + 0x4bf54741, 0xf20e9ca9, 0xe373f6d0, 0x5a882d38, 0xc1892222, + 0x7872f9ca, 0x690f93b3, 0xd0f4485b, 0xc01e1489, 0x79e5cf61, + 0x6898a518, 0xd1637ef0, 0x4a6271ea, 0xf399aa02, 0xe2e4c07b, + 0x5b1f1b93, 0x0f97d80e, 0xb66c03e6, 0xa711699f, 0x1eeab277, + 0x85ebbd6d, 0x3c106685, 0x2d6d0cfc, 0x9496d714, 0x0cb9b558, + 0xb5426eb0, 0xa43f04c9, 0x1dc4df21, 0x86c5d03b, 0x3f3e0bd3, + 0x2e4361aa, 0x97b8ba42, 0xc33079df, 0x7acba237, 0x6bb6c84e, + 0xd24d13a6, 0x494c1cbc, 0xf0b7c754, 0xe1caad2d, 0x583176c5, + 0x48db2a17, 0xf120f1ff, 0xe05d9b86, 0x59a6406e, 0xc2a74f74, + 0x7b5c949c, 0x6a21fee5, 0xd3da250d, 0x8752e690, 0x3ea93d78, + 0x2fd45701, 0x962f8ce9, 0x0d2e83f3, 0xb4d5581b, 0xa5a83262, + 0x1c53e98a}, + {0x00000000, 0x9d0fe176, 0xe16ec4ad, 0x7c6125db, 0x19ac8f1b, + 0x84a36e6d, 0xf8c24bb6, 0x65cdaac0, 0x33591e36, 0xae56ff40, + 0xd237da9b, 0x4f383bed, 0x2af5912d, 0xb7fa705b, 0xcb9b5580, + 0x5694b4f6, 0x66b23c6c, 0xfbbddd1a, 0x87dcf8c1, 0x1ad319b7, + 0x7f1eb377, 0xe2115201, 0x9e7077da, 0x037f96ac, 0x55eb225a, + 0xc8e4c32c, 0xb485e6f7, 0x298a0781, 0x4c47ad41, 0xd1484c37, + 0xad2969ec, 0x3026889a, 0xcd6478d8, 0x506b99ae, 0x2c0abc75, + 0xb1055d03, 0xd4c8f7c3, 0x49c716b5, 0x35a6336e, 0xa8a9d218, + 0xfe3d66ee, 0x63328798, 0x1f53a243, 0x825c4335, 0xe791e9f5, + 0x7a9e0883, 0x06ff2d58, 0x9bf0cc2e, 0xabd644b4, 0x36d9a5c2, + 0x4ab88019, 0xd7b7616f, 0xb27acbaf, 0x2f752ad9, 0x53140f02, + 0xce1bee74, 0x988f5a82, 0x0580bbf4, 0x79e19e2f, 0xe4ee7f59, + 0x8123d599, 0x1c2c34ef, 0x604d1134, 0xfd42f042, 0x41b9f7f1, + 0xdcb61687, 0xa0d7335c, 0x3dd8d22a, 0x581578ea, 0xc51a999c, + 0xb97bbc47, 0x24745d31, 0x72e0e9c7, 0xefef08b1, 0x938e2d6a, + 0x0e81cc1c, 0x6b4c66dc, 0xf64387aa, 0x8a22a271, 0x172d4307, + 0x270bcb9d, 0xba042aeb, 0xc6650f30, 0x5b6aee46, 0x3ea74486, + 0xa3a8a5f0, 0xdfc9802b, 0x42c6615d, 0x1452d5ab, 0x895d34dd, + 0xf53c1106, 0x6833f070, 0x0dfe5ab0, 0x90f1bbc6, 0xec909e1d, + 0x719f7f6b, 0x8cdd8f29, 0x11d26e5f, 0x6db34b84, 0xf0bcaaf2, + 0x95710032, 0x087ee144, 0x741fc49f, 0xe91025e9, 0xbf84911f, + 0x228b7069, 0x5eea55b2, 0xc3e5b4c4, 0xa6281e04, 0x3b27ff72, + 0x4746daa9, 0xda493bdf, 0xea6fb345, 0x77605233, 0x0b0177e8, + 0x960e969e, 0xf3c33c5e, 0x6eccdd28, 0x12adf8f3, 0x8fa21985, + 0xd936ad73, 0x44394c05, 0x385869de, 0xa55788a8, 0xc09a2268, + 0x5d95c31e, 0x21f4e6c5, 0xbcfb07b3, 0x8373efe2, 0x1e7c0e94, + 0x621d2b4f, 0xff12ca39, 0x9adf60f9, 0x07d0818f, 0x7bb1a454, + 0xe6be4522, 0xb02af1d4, 0x2d2510a2, 0x51443579, 0xcc4bd40f, + 0xa9867ecf, 0x34899fb9, 0x48e8ba62, 0xd5e75b14, 0xe5c1d38e, + 0x78ce32f8, 0x04af1723, 0x99a0f655, 0xfc6d5c95, 0x6162bde3, + 0x1d039838, 0x800c794e, 0xd698cdb8, 0x4b972cce, 0x37f60915, + 0xaaf9e863, 0xcf3442a3, 0x523ba3d5, 0x2e5a860e, 0xb3556778, + 0x4e17973a, 0xd318764c, 0xaf795397, 0x3276b2e1, 0x57bb1821, + 0xcab4f957, 0xb6d5dc8c, 0x2bda3dfa, 0x7d4e890c, 0xe041687a, + 0x9c204da1, 0x012facd7, 0x64e20617, 0xf9ede761, 0x858cc2ba, + 0x188323cc, 0x28a5ab56, 0xb5aa4a20, 0xc9cb6ffb, 0x54c48e8d, + 0x3109244d, 0xac06c53b, 0xd067e0e0, 0x4d680196, 0x1bfcb560, + 0x86f35416, 0xfa9271cd, 0x679d90bb, 0x02503a7b, 0x9f5fdb0d, + 0xe33efed6, 0x7e311fa0, 0xc2ca1813, 0x5fc5f965, 0x23a4dcbe, + 0xbeab3dc8, 0xdb669708, 0x4669767e, 0x3a0853a5, 0xa707b2d3, + 0xf1930625, 0x6c9ce753, 0x10fdc288, 0x8df223fe, 0xe83f893e, + 0x75306848, 0x09514d93, 0x945eace5, 0xa478247f, 0x3977c509, + 0x4516e0d2, 0xd81901a4, 0xbdd4ab64, 0x20db4a12, 0x5cba6fc9, + 0xc1b58ebf, 0x97213a49, 0x0a2edb3f, 0x764ffee4, 0xeb401f92, + 0x8e8db552, 0x13825424, 0x6fe371ff, 0xf2ec9089, 0x0fae60cb, + 0x92a181bd, 0xeec0a466, 0x73cf4510, 0x1602efd0, 0x8b0d0ea6, + 0xf76c2b7d, 0x6a63ca0b, 0x3cf77efd, 0xa1f89f8b, 0xdd99ba50, + 0x40965b26, 0x255bf1e6, 0xb8541090, 0xc435354b, 0x593ad43d, + 0x691c5ca7, 0xf413bdd1, 0x8872980a, 0x157d797c, 0x70b0d3bc, + 0xedbf32ca, 0x91de1711, 0x0cd1f667, 0x5a454291, 0xc74aa3e7, + 0xbb2b863c, 0x2624674a, 0x43e9cd8a, 0xdee62cfc, 0xa2870927, + 0x3f88e851}, + {0x00000000, 0xdd96d985, 0x605cb54b, 0xbdca6cce, 0xc0b96a96, + 0x1d2fb313, 0xa0e5dfdd, 0x7d730658, 0x5a03d36d, 0x87950ae8, + 0x3a5f6626, 0xe7c9bfa3, 0x9abab9fb, 0x472c607e, 0xfae60cb0, + 0x2770d535, 0xb407a6da, 0x69917f5f, 0xd45b1391, 0x09cdca14, + 0x74becc4c, 0xa92815c9, 0x14e27907, 0xc974a082, 0xee0475b7, + 0x3392ac32, 0x8e58c0fc, 0x53ce1979, 0x2ebd1f21, 0xf32bc6a4, + 0x4ee1aa6a, 0x937773ef, 0xb37e4bf5, 0x6ee89270, 0xd322febe, + 0x0eb4273b, 0x73c72163, 0xae51f8e6, 0x139b9428, 0xce0d4dad, + 0xe97d9898, 0x34eb411d, 0x89212dd3, 0x54b7f456, 0x29c4f20e, + 0xf4522b8b, 0x49984745, 0x940e9ec0, 0x0779ed2f, 0xdaef34aa, + 0x67255864, 0xbab381e1, 0xc7c087b9, 0x1a565e3c, 0xa79c32f2, + 0x7a0aeb77, 0x5d7a3e42, 0x80ece7c7, 0x3d268b09, 0xe0b0528c, + 0x9dc354d4, 0x40558d51, 0xfd9fe19f, 0x2009381a, 0xbd8d91ab, + 0x601b482e, 0xddd124e0, 0x0047fd65, 0x7d34fb3d, 0xa0a222b8, + 0x1d684e76, 0xc0fe97f3, 0xe78e42c6, 0x3a189b43, 0x87d2f78d, + 0x5a442e08, 0x27372850, 0xfaa1f1d5, 0x476b9d1b, 0x9afd449e, + 0x098a3771, 0xd41ceef4, 0x69d6823a, 0xb4405bbf, 0xc9335de7, + 0x14a58462, 0xa96fe8ac, 0x74f93129, 0x5389e41c, 0x8e1f3d99, + 0x33d55157, 0xee4388d2, 0x93308e8a, 0x4ea6570f, 0xf36c3bc1, + 0x2efae244, 0x0ef3da5e, 0xd36503db, 0x6eaf6f15, 0xb339b690, + 0xce4ab0c8, 0x13dc694d, 0xae160583, 0x7380dc06, 0x54f00933, + 0x8966d0b6, 0x34acbc78, 0xe93a65fd, 0x944963a5, 0x49dfba20, + 0xf415d6ee, 0x29830f6b, 0xbaf47c84, 0x6762a501, 0xdaa8c9cf, + 0x073e104a, 0x7a4d1612, 0xa7dbcf97, 0x1a11a359, 0xc7877adc, + 0xe0f7afe9, 0x3d61766c, 0x80ab1aa2, 0x5d3dc327, 0x204ec57f, + 0xfdd81cfa, 0x40127034, 0x9d84a9b1, 0xa06a2517, 0x7dfcfc92, + 0xc036905c, 0x1da049d9, 0x60d34f81, 0xbd459604, 0x008ffaca, + 0xdd19234f, 0xfa69f67a, 0x27ff2fff, 0x9a354331, 0x47a39ab4, + 0x3ad09cec, 0xe7464569, 0x5a8c29a7, 0x871af022, 0x146d83cd, + 0xc9fb5a48, 0x74313686, 0xa9a7ef03, 0xd4d4e95b, 0x094230de, + 0xb4885c10, 0x691e8595, 0x4e6e50a0, 0x93f88925, 0x2e32e5eb, + 0xf3a43c6e, 0x8ed73a36, 0x5341e3b3, 0xee8b8f7d, 0x331d56f8, + 0x13146ee2, 0xce82b767, 0x7348dba9, 0xaede022c, 0xd3ad0474, + 0x0e3bddf1, 0xb3f1b13f, 0x6e6768ba, 0x4917bd8f, 0x9481640a, + 0x294b08c4, 0xf4ddd141, 0x89aed719, 0x54380e9c, 0xe9f26252, + 0x3464bbd7, 0xa713c838, 0x7a8511bd, 0xc74f7d73, 0x1ad9a4f6, + 0x67aaa2ae, 0xba3c7b2b, 0x07f617e5, 0xda60ce60, 0xfd101b55, + 0x2086c2d0, 0x9d4cae1e, 0x40da779b, 0x3da971c3, 0xe03fa846, + 0x5df5c488, 0x80631d0d, 0x1de7b4bc, 0xc0716d39, 0x7dbb01f7, + 0xa02dd872, 0xdd5ede2a, 0x00c807af, 0xbd026b61, 0x6094b2e4, + 0x47e467d1, 0x9a72be54, 0x27b8d29a, 0xfa2e0b1f, 0x875d0d47, + 0x5acbd4c2, 0xe701b80c, 0x3a976189, 0xa9e01266, 0x7476cbe3, + 0xc9bca72d, 0x142a7ea8, 0x695978f0, 0xb4cfa175, 0x0905cdbb, + 0xd493143e, 0xf3e3c10b, 0x2e75188e, 0x93bf7440, 0x4e29adc5, + 0x335aab9d, 0xeecc7218, 0x53061ed6, 0x8e90c753, 0xae99ff49, + 0x730f26cc, 0xcec54a02, 0x13539387, 0x6e2095df, 0xb3b64c5a, + 0x0e7c2094, 0xd3eaf911, 0xf49a2c24, 0x290cf5a1, 0x94c6996f, + 0x495040ea, 0x342346b2, 0xe9b59f37, 0x547ff3f9, 0x89e92a7c, + 0x1a9e5993, 0xc7088016, 0x7ac2ecd8, 0xa754355d, 0xda273305, + 0x07b1ea80, 0xba7b864e, 0x67ed5fcb, 0x409d8afe, 0x9d0b537b, + 0x20c13fb5, 0xfd57e630, 0x8024e068, 0x5db239ed, 0xe0785523, + 0x3dee8ca6}}; + +local const z_word_t FAR crc_braid_big_table[][256] = { + {0x00000000, 0x85d996dd, 0x4bb55c60, 0xce6ccabd, 0x966ab9c0, + 0x13b32f1d, 0xdddfe5a0, 0x5806737d, 0x6dd3035a, 0xe80a9587, + 0x26665f3a, 0xa3bfc9e7, 0xfbb9ba9a, 0x7e602c47, 0xb00ce6fa, + 0x35d57027, 0xdaa607b4, 0x5f7f9169, 0x91135bd4, 0x14cacd09, + 0x4cccbe74, 0xc91528a9, 0x0779e214, 0x82a074c9, 0xb77504ee, + 0x32ac9233, 0xfcc0588e, 0x7919ce53, 0x211fbd2e, 0xa4c62bf3, + 0x6aaae14e, 0xef737793, 0xf54b7eb3, 0x7092e86e, 0xbefe22d3, + 0x3b27b40e, 0x6321c773, 0xe6f851ae, 0x28949b13, 0xad4d0dce, + 0x98987de9, 0x1d41eb34, 0xd32d2189, 0x56f4b754, 0x0ef2c429, + 0x8b2b52f4, 0x45479849, 0xc09e0e94, 0x2fed7907, 0xaa34efda, + 0x64582567, 0xe181b3ba, 0xb987c0c7, 0x3c5e561a, 0xf2329ca7, + 0x77eb0a7a, 0x423e7a5d, 0xc7e7ec80, 0x098b263d, 0x8c52b0e0, + 0xd454c39d, 0x518d5540, 0x9fe19ffd, 0x1a380920, 0xab918dbd, + 0x2e481b60, 0xe024d1dd, 0x65fd4700, 0x3dfb347d, 0xb822a2a0, + 0x764e681d, 0xf397fec0, 0xc6428ee7, 0x439b183a, 0x8df7d287, + 0x082e445a, 0x50283727, 0xd5f1a1fa, 0x1b9d6b47, 0x9e44fd9a, + 0x71378a09, 0xf4ee1cd4, 0x3a82d669, 0xbf5b40b4, 0xe75d33c9, + 0x6284a514, 0xace86fa9, 0x2931f974, 0x1ce48953, 0x993d1f8e, + 0x5751d533, 0xd28843ee, 0x8a8e3093, 0x0f57a64e, 0xc13b6cf3, + 0x44e2fa2e, 0x5edaf30e, 0xdb0365d3, 0x156faf6e, 0x90b639b3, + 0xc8b04ace, 0x4d69dc13, 0x830516ae, 0x06dc8073, 0x3309f054, + 0xb6d06689, 0x78bcac34, 0xfd653ae9, 0xa5634994, 0x20badf49, + 0xeed615f4, 0x6b0f8329, 0x847cf4ba, 0x01a56267, 0xcfc9a8da, + 0x4a103e07, 0x12164d7a, 0x97cfdba7, 0x59a3111a, 0xdc7a87c7, + 0xe9aff7e0, 0x6c76613d, 0xa21aab80, 0x27c33d5d, 0x7fc54e20, + 0xfa1cd8fd, 0x34701240, 0xb1a9849d, 0x17256aa0, 0x92fcfc7d, + 0x5c9036c0, 0xd949a01d, 0x814fd360, 0x049645bd, 0xcafa8f00, + 0x4f2319dd, 0x7af669fa, 0xff2fff27, 0x3143359a, 0xb49aa347, + 0xec9cd03a, 0x694546e7, 0xa7298c5a, 0x22f01a87, 0xcd836d14, + 0x485afbc9, 0x86363174, 0x03efa7a9, 0x5be9d4d4, 0xde304209, + 0x105c88b4, 0x95851e69, 0xa0506e4e, 0x2589f893, 0xebe5322e, + 0x6e3ca4f3, 0x363ad78e, 0xb3e34153, 0x7d8f8bee, 0xf8561d33, + 0xe26e1413, 0x67b782ce, 0xa9db4873, 0x2c02deae, 0x7404add3, + 0xf1dd3b0e, 0x3fb1f1b3, 0xba68676e, 0x8fbd1749, 0x0a648194, + 0xc4084b29, 0x41d1ddf4, 0x19d7ae89, 0x9c0e3854, 0x5262f2e9, + 0xd7bb6434, 0x38c813a7, 0xbd11857a, 0x737d4fc7, 0xf6a4d91a, + 0xaea2aa67, 0x2b7b3cba, 0xe517f607, 0x60ce60da, 0x551b10fd, + 0xd0c28620, 0x1eae4c9d, 0x9b77da40, 0xc371a93d, 0x46a83fe0, + 0x88c4f55d, 0x0d1d6380, 0xbcb4e71d, 0x396d71c0, 0xf701bb7d, + 0x72d82da0, 0x2ade5edd, 0xaf07c800, 0x616b02bd, 0xe4b29460, + 0xd167e447, 0x54be729a, 0x9ad2b827, 0x1f0b2efa, 0x470d5d87, + 0xc2d4cb5a, 0x0cb801e7, 0x8961973a, 0x6612e0a9, 0xe3cb7674, + 0x2da7bcc9, 0xa87e2a14, 0xf0785969, 0x75a1cfb4, 0xbbcd0509, + 0x3e1493d4, 0x0bc1e3f3, 0x8e18752e, 0x4074bf93, 0xc5ad294e, + 0x9dab5a33, 0x1872ccee, 0xd61e0653, 0x53c7908e, 0x49ff99ae, + 0xcc260f73, 0x024ac5ce, 0x87935313, 0xdf95206e, 0x5a4cb6b3, + 0x94207c0e, 0x11f9ead3, 0x242c9af4, 0xa1f50c29, 0x6f99c694, + 0xea405049, 0xb2462334, 0x379fb5e9, 0xf9f37f54, 0x7c2ae989, + 0x93599e1a, 0x168008c7, 0xd8ecc27a, 0x5d3554a7, 0x053327da, + 0x80eab107, 0x4e867bba, 0xcb5fed67, 0xfe8a9d40, 0x7b530b9d, + 0xb53fc120, 0x30e657fd, 0x68e02480, 0xed39b25d, 0x235578e0, + 0xa68cee3d}, + {0x00000000, 0x76e10f9d, 0xadc46ee1, 0xdb25617c, 0x1b8fac19, + 0x6d6ea384, 0xb64bc2f8, 0xc0aacd65, 0x361e5933, 0x40ff56ae, + 0x9bda37d2, 0xed3b384f, 0x2d91f52a, 0x5b70fab7, 0x80559bcb, + 0xf6b49456, 0x6c3cb266, 0x1addbdfb, 0xc1f8dc87, 0xb719d31a, + 0x77b31e7f, 0x015211e2, 0xda77709e, 0xac967f03, 0x5a22eb55, + 0x2cc3e4c8, 0xf7e685b4, 0x81078a29, 0x41ad474c, 0x374c48d1, + 0xec6929ad, 0x9a882630, 0xd87864cd, 0xae996b50, 0x75bc0a2c, + 0x035d05b1, 0xc3f7c8d4, 0xb516c749, 0x6e33a635, 0x18d2a9a8, + 0xee663dfe, 0x98873263, 0x43a2531f, 0x35435c82, 0xf5e991e7, + 0x83089e7a, 0x582dff06, 0x2eccf09b, 0xb444d6ab, 0xc2a5d936, + 0x1980b84a, 0x6f61b7d7, 0xafcb7ab2, 0xd92a752f, 0x020f1453, + 0x74ee1bce, 0x825a8f98, 0xf4bb8005, 0x2f9ee179, 0x597feee4, + 0x99d52381, 0xef342c1c, 0x34114d60, 0x42f042fd, 0xf1f7b941, + 0x8716b6dc, 0x5c33d7a0, 0x2ad2d83d, 0xea781558, 0x9c991ac5, + 0x47bc7bb9, 0x315d7424, 0xc7e9e072, 0xb108efef, 0x6a2d8e93, + 0x1ccc810e, 0xdc664c6b, 0xaa8743f6, 0x71a2228a, 0x07432d17, + 0x9dcb0b27, 0xeb2a04ba, 0x300f65c6, 0x46ee6a5b, 0x8644a73e, + 0xf0a5a8a3, 0x2b80c9df, 0x5d61c642, 0xabd55214, 0xdd345d89, + 0x06113cf5, 0x70f03368, 0xb05afe0d, 0xc6bbf190, 0x1d9e90ec, + 0x6b7f9f71, 0x298fdd8c, 0x5f6ed211, 0x844bb36d, 0xf2aabcf0, + 0x32007195, 0x44e17e08, 0x9fc41f74, 0xe92510e9, 0x1f9184bf, + 0x69708b22, 0xb255ea5e, 0xc4b4e5c3, 0x041e28a6, 0x72ff273b, + 0xa9da4647, 0xdf3b49da, 0x45b36fea, 0x33526077, 0xe877010b, + 0x9e960e96, 0x5e3cc3f3, 0x28ddcc6e, 0xf3f8ad12, 0x8519a28f, + 0x73ad36d9, 0x054c3944, 0xde695838, 0xa88857a5, 0x68229ac0, + 0x1ec3955d, 0xc5e6f421, 0xb307fbbc, 0xe2ef7383, 0x940e7c1e, + 0x4f2b1d62, 0x39ca12ff, 0xf960df9a, 0x8f81d007, 0x54a4b17b, + 0x2245bee6, 0xd4f12ab0, 0xa210252d, 0x79354451, 0x0fd44bcc, + 0xcf7e86a9, 0xb99f8934, 0x62bae848, 0x145be7d5, 0x8ed3c1e5, + 0xf832ce78, 0x2317af04, 0x55f6a099, 0x955c6dfc, 0xe3bd6261, + 0x3898031d, 0x4e790c80, 0xb8cd98d6, 0xce2c974b, 0x1509f637, + 0x63e8f9aa, 0xa34234cf, 0xd5a33b52, 0x0e865a2e, 0x786755b3, + 0x3a97174e, 0x4c7618d3, 0x975379af, 0xe1b27632, 0x2118bb57, + 0x57f9b4ca, 0x8cdcd5b6, 0xfa3dda2b, 0x0c894e7d, 0x7a6841e0, + 0xa14d209c, 0xd7ac2f01, 0x1706e264, 0x61e7edf9, 0xbac28c85, + 0xcc238318, 0x56aba528, 0x204aaab5, 0xfb6fcbc9, 0x8d8ec454, + 0x4d240931, 0x3bc506ac, 0xe0e067d0, 0x9601684d, 0x60b5fc1b, + 0x1654f386, 0xcd7192fa, 0xbb909d67, 0x7b3a5002, 0x0ddb5f9f, + 0xd6fe3ee3, 0xa01f317e, 0x1318cac2, 0x65f9c55f, 0xbedca423, + 0xc83dabbe, 0x089766db, 0x7e766946, 0xa553083a, 0xd3b207a7, + 0x250693f1, 0x53e79c6c, 0x88c2fd10, 0xfe23f28d, 0x3e893fe8, + 0x48683075, 0x934d5109, 0xe5ac5e94, 0x7f2478a4, 0x09c57739, + 0xd2e01645, 0xa40119d8, 0x64abd4bd, 0x124adb20, 0xc96fba5c, + 0xbf8eb5c1, 0x493a2197, 0x3fdb2e0a, 0xe4fe4f76, 0x921f40eb, + 0x52b58d8e, 0x24548213, 0xff71e36f, 0x8990ecf2, 0xcb60ae0f, + 0xbd81a192, 0x66a4c0ee, 0x1045cf73, 0xd0ef0216, 0xa60e0d8b, + 0x7d2b6cf7, 0x0bca636a, 0xfd7ef73c, 0x8b9ff8a1, 0x50ba99dd, + 0x265b9640, 0xe6f15b25, 0x901054b8, 0x4b3535c4, 0x3dd43a59, + 0xa75c1c69, 0xd1bd13f4, 0x0a987288, 0x7c797d15, 0xbcd3b070, + 0xca32bfed, 0x1117de91, 0x67f6d10c, 0x9142455a, 0xe7a34ac7, + 0x3c862bbb, 0x4a672426, 0x8acde943, 0xfc2ce6de, 0x270987a2, + 0x51e8883f}, + {0x00000000, 0xe8dbfbb9, 0x91b186a8, 0x796a7d11, 0x63657c8a, + 0x8bbe8733, 0xf2d4fa22, 0x1a0f019b, 0x87cc89cf, 0x6f177276, + 0x167d0f67, 0xfea6f4de, 0xe4a9f545, 0x0c720efc, 0x751873ed, + 0x9dc38854, 0x4f9f6244, 0xa74499fd, 0xde2ee4ec, 0x36f51f55, + 0x2cfa1ece, 0xc421e577, 0xbd4b9866, 0x559063df, 0xc853eb8b, + 0x20881032, 0x59e26d23, 0xb139969a, 0xab369701, 0x43ed6cb8, + 0x3a8711a9, 0xd25cea10, 0x9e3ec588, 0x76e53e31, 0x0f8f4320, + 0xe754b899, 0xfd5bb902, 0x158042bb, 0x6cea3faa, 0x8431c413, + 0x19f24c47, 0xf129b7fe, 0x8843caef, 0x60983156, 0x7a9730cd, + 0x924ccb74, 0xeb26b665, 0x03fd4ddc, 0xd1a1a7cc, 0x397a5c75, + 0x40102164, 0xa8cbdadd, 0xb2c4db46, 0x5a1f20ff, 0x23755dee, + 0xcbaea657, 0x566d2e03, 0xbeb6d5ba, 0xc7dca8ab, 0x2f075312, + 0x35085289, 0xddd3a930, 0xa4b9d421, 0x4c622f98, 0x7d7bfbca, + 0x95a00073, 0xecca7d62, 0x041186db, 0x1e1e8740, 0xf6c57cf9, + 0x8faf01e8, 0x6774fa51, 0xfab77205, 0x126c89bc, 0x6b06f4ad, + 0x83dd0f14, 0x99d20e8f, 0x7109f536, 0x08638827, 0xe0b8739e, + 0x32e4998e, 0xda3f6237, 0xa3551f26, 0x4b8ee49f, 0x5181e504, + 0xb95a1ebd, 0xc03063ac, 0x28eb9815, 0xb5281041, 0x5df3ebf8, + 0x249996e9, 0xcc426d50, 0xd64d6ccb, 0x3e969772, 0x47fcea63, + 0xaf2711da, 0xe3453e42, 0x0b9ec5fb, 0x72f4b8ea, 0x9a2f4353, + 0x802042c8, 0x68fbb971, 0x1191c460, 0xf94a3fd9, 0x6489b78d, + 0x8c524c34, 0xf5383125, 0x1de3ca9c, 0x07eccb07, 0xef3730be, + 0x965d4daf, 0x7e86b616, 0xacda5c06, 0x4401a7bf, 0x3d6bdaae, + 0xd5b02117, 0xcfbf208c, 0x2764db35, 0x5e0ea624, 0xb6d55d9d, + 0x2b16d5c9, 0xc3cd2e70, 0xbaa75361, 0x527ca8d8, 0x4873a943, + 0xa0a852fa, 0xd9c22feb, 0x3119d452, 0xbbf0874e, 0x532b7cf7, + 0x2a4101e6, 0xc29afa5f, 0xd895fbc4, 0x304e007d, 0x49247d6c, + 0xa1ff86d5, 0x3c3c0e81, 0xd4e7f538, 0xad8d8829, 0x45567390, + 0x5f59720b, 0xb78289b2, 0xcee8f4a3, 0x26330f1a, 0xf46fe50a, + 0x1cb41eb3, 0x65de63a2, 0x8d05981b, 0x970a9980, 0x7fd16239, + 0x06bb1f28, 0xee60e491, 0x73a36cc5, 0x9b78977c, 0xe212ea6d, + 0x0ac911d4, 0x10c6104f, 0xf81debf6, 0x817796e7, 0x69ac6d5e, + 0x25ce42c6, 0xcd15b97f, 0xb47fc46e, 0x5ca43fd7, 0x46ab3e4c, + 0xae70c5f5, 0xd71ab8e4, 0x3fc1435d, 0xa202cb09, 0x4ad930b0, + 0x33b34da1, 0xdb68b618, 0xc167b783, 0x29bc4c3a, 0x50d6312b, + 0xb80dca92, 0x6a512082, 0x828adb3b, 0xfbe0a62a, 0x133b5d93, + 0x09345c08, 0xe1efa7b1, 0x9885daa0, 0x705e2119, 0xed9da94d, + 0x054652f4, 0x7c2c2fe5, 0x94f7d45c, 0x8ef8d5c7, 0x66232e7e, + 0x1f49536f, 0xf792a8d6, 0xc68b7c84, 0x2e50873d, 0x573afa2c, + 0xbfe10195, 0xa5ee000e, 0x4d35fbb7, 0x345f86a6, 0xdc847d1f, + 0x4147f54b, 0xa99c0ef2, 0xd0f673e3, 0x382d885a, 0x222289c1, + 0xcaf97278, 0xb3930f69, 0x5b48f4d0, 0x89141ec0, 0x61cfe579, + 0x18a59868, 0xf07e63d1, 0xea71624a, 0x02aa99f3, 0x7bc0e4e2, + 0x931b1f5b, 0x0ed8970f, 0xe6036cb6, 0x9f6911a7, 0x77b2ea1e, + 0x6dbdeb85, 0x8566103c, 0xfc0c6d2d, 0x14d79694, 0x58b5b90c, + 0xb06e42b5, 0xc9043fa4, 0x21dfc41d, 0x3bd0c586, 0xd30b3e3f, + 0xaa61432e, 0x42bab897, 0xdf7930c3, 0x37a2cb7a, 0x4ec8b66b, + 0xa6134dd2, 0xbc1c4c49, 0x54c7b7f0, 0x2dadcae1, 0xc5763158, + 0x172adb48, 0xfff120f1, 0x869b5de0, 0x6e40a659, 0x744fa7c2, + 0x9c945c7b, 0xe5fe216a, 0x0d25dad3, 0x90e65287, 0x783da93e, + 0x0157d42f, 0xe98c2f96, 0xf3832e0d, 0x1b58d5b4, 0x6232a8a5, + 0x8ae9531c}, + {0x00000000, 0x919168ae, 0x6325a087, 0xf2b4c829, 0x874c31d4, + 0x16dd597a, 0xe4699153, 0x75f8f9fd, 0x4f9f1373, 0xde0e7bdd, + 0x2cbab3f4, 0xbd2bdb5a, 0xc8d322a7, 0x59424a09, 0xabf68220, + 0x3a67ea8e, 0x9e3e27e6, 0x0faf4f48, 0xfd1b8761, 0x6c8aefcf, + 0x19721632, 0x88e37e9c, 0x7a57b6b5, 0xebc6de1b, 0xd1a13495, + 0x40305c3b, 0xb2849412, 0x2315fcbc, 0x56ed0541, 0xc77c6def, + 0x35c8a5c6, 0xa459cd68, 0x7d7b3f17, 0xecea57b9, 0x1e5e9f90, + 0x8fcff73e, 0xfa370ec3, 0x6ba6666d, 0x9912ae44, 0x0883c6ea, + 0x32e42c64, 0xa37544ca, 0x51c18ce3, 0xc050e44d, 0xb5a81db0, + 0x2439751e, 0xd68dbd37, 0x471cd599, 0xe34518f1, 0x72d4705f, + 0x8060b876, 0x11f1d0d8, 0x64092925, 0xf598418b, 0x072c89a2, + 0x96bde10c, 0xacda0b82, 0x3d4b632c, 0xcfffab05, 0x5e6ec3ab, + 0x2b963a56, 0xba0752f8, 0x48b39ad1, 0xd922f27f, 0xfaf67e2e, + 0x6b671680, 0x99d3dea9, 0x0842b607, 0x7dba4ffa, 0xec2b2754, + 0x1e9fef7d, 0x8f0e87d3, 0xb5696d5d, 0x24f805f3, 0xd64ccdda, + 0x47dda574, 0x32255c89, 0xa3b43427, 0x5100fc0e, 0xc09194a0, + 0x64c859c8, 0xf5593166, 0x07edf94f, 0x967c91e1, 0xe384681c, + 0x721500b2, 0x80a1c89b, 0x1130a035, 0x2b574abb, 0xbac62215, + 0x4872ea3c, 0xd9e38292, 0xac1b7b6f, 0x3d8a13c1, 0xcf3edbe8, + 0x5eafb346, 0x878d4139, 0x161c2997, 0xe4a8e1be, 0x75398910, + 0x00c170ed, 0x91501843, 0x63e4d06a, 0xf275b8c4, 0xc812524a, + 0x59833ae4, 0xab37f2cd, 0x3aa69a63, 0x4f5e639e, 0xdecf0b30, + 0x2c7bc319, 0xbdeaabb7, 0x19b366df, 0x88220e71, 0x7a96c658, + 0xeb07aef6, 0x9eff570b, 0x0f6e3fa5, 0xfddaf78c, 0x6c4b9f22, + 0x562c75ac, 0xc7bd1d02, 0x3509d52b, 0xa498bd85, 0xd1604478, + 0x40f12cd6, 0xb245e4ff, 0x23d48c51, 0xf4edfd5c, 0x657c95f2, + 0x97c85ddb, 0x06593575, 0x73a1cc88, 0xe230a426, 0x10846c0f, + 0x811504a1, 0xbb72ee2f, 0x2ae38681, 0xd8574ea8, 0x49c62606, + 0x3c3edffb, 0xadafb755, 0x5f1b7f7c, 0xce8a17d2, 0x6ad3daba, + 0xfb42b214, 0x09f67a3d, 0x98671293, 0xed9feb6e, 0x7c0e83c0, + 0x8eba4be9, 0x1f2b2347, 0x254cc9c9, 0xb4dda167, 0x4669694e, + 0xd7f801e0, 0xa200f81d, 0x339190b3, 0xc125589a, 0x50b43034, + 0x8996c24b, 0x1807aae5, 0xeab362cc, 0x7b220a62, 0x0edaf39f, + 0x9f4b9b31, 0x6dff5318, 0xfc6e3bb6, 0xc609d138, 0x5798b996, + 0xa52c71bf, 0x34bd1911, 0x4145e0ec, 0xd0d48842, 0x2260406b, + 0xb3f128c5, 0x17a8e5ad, 0x86398d03, 0x748d452a, 0xe51c2d84, + 0x90e4d479, 0x0175bcd7, 0xf3c174fe, 0x62501c50, 0x5837f6de, + 0xc9a69e70, 0x3b125659, 0xaa833ef7, 0xdf7bc70a, 0x4eeaafa4, + 0xbc5e678d, 0x2dcf0f23, 0x0e1b8372, 0x9f8aebdc, 0x6d3e23f5, + 0xfcaf4b5b, 0x8957b2a6, 0x18c6da08, 0xea721221, 0x7be37a8f, + 0x41849001, 0xd015f8af, 0x22a13086, 0xb3305828, 0xc6c8a1d5, + 0x5759c97b, 0xa5ed0152, 0x347c69fc, 0x9025a494, 0x01b4cc3a, + 0xf3000413, 0x62916cbd, 0x17699540, 0x86f8fdee, 0x744c35c7, + 0xe5dd5d69, 0xdfbab7e7, 0x4e2bdf49, 0xbc9f1760, 0x2d0e7fce, + 0x58f68633, 0xc967ee9d, 0x3bd326b4, 0xaa424e1a, 0x7360bc65, + 0xe2f1d4cb, 0x10451ce2, 0x81d4744c, 0xf42c8db1, 0x65bde51f, + 0x97092d36, 0x06984598, 0x3cffaf16, 0xad6ec7b8, 0x5fda0f91, + 0xce4b673f, 0xbbb39ec2, 0x2a22f66c, 0xd8963e45, 0x490756eb, + 0xed5e9b83, 0x7ccff32d, 0x8e7b3b04, 0x1fea53aa, 0x6a12aa57, + 0xfb83c2f9, 0x09370ad0, 0x98a6627e, 0xa2c188f0, 0x3350e05e, + 0xc1e42877, 0x507540d9, 0x258db924, 0xb41cd18a, 0x46a819a3, + 0xd739710d}}; + +#endif + +#endif + +#if N == 5 + +#if W == 8 + +local const z_crc_t FAR crc_braid_table[][256] = { + {0x00000000, 0xaf449247, 0x85f822cf, 0x2abcb088, 0xd08143df, + 0x7fc5d198, 0x55796110, 0xfa3df357, 0x7a7381ff, 0xd53713b8, + 0xff8ba330, 0x50cf3177, 0xaaf2c220, 0x05b65067, 0x2f0ae0ef, + 0x804e72a8, 0xf4e703fe, 0x5ba391b9, 0x711f2131, 0xde5bb376, + 0x24664021, 0x8b22d266, 0xa19e62ee, 0x0edaf0a9, 0x8e948201, + 0x21d01046, 0x0b6ca0ce, 0xa4283289, 0x5e15c1de, 0xf1515399, + 0xdbede311, 0x74a97156, 0x32bf01bd, 0x9dfb93fa, 0xb7472372, + 0x1803b135, 0xe23e4262, 0x4d7ad025, 0x67c660ad, 0xc882f2ea, + 0x48cc8042, 0xe7881205, 0xcd34a28d, 0x627030ca, 0x984dc39d, + 0x370951da, 0x1db5e152, 0xb2f17315, 0xc6580243, 0x691c9004, + 0x43a0208c, 0xece4b2cb, 0x16d9419c, 0xb99dd3db, 0x93216353, + 0x3c65f114, 0xbc2b83bc, 0x136f11fb, 0x39d3a173, 0x96973334, + 0x6caac063, 0xc3ee5224, 0xe952e2ac, 0x461670eb, 0x657e037a, + 0xca3a913d, 0xe08621b5, 0x4fc2b3f2, 0xb5ff40a5, 0x1abbd2e2, + 0x3007626a, 0x9f43f02d, 0x1f0d8285, 0xb04910c2, 0x9af5a04a, + 0x35b1320d, 0xcf8cc15a, 0x60c8531d, 0x4a74e395, 0xe53071d2, + 0x91990084, 0x3edd92c3, 0x1461224b, 0xbb25b00c, 0x4118435b, + 0xee5cd11c, 0xc4e06194, 0x6ba4f3d3, 0xebea817b, 0x44ae133c, + 0x6e12a3b4, 0xc15631f3, 0x3b6bc2a4, 0x942f50e3, 0xbe93e06b, + 0x11d7722c, 0x57c102c7, 0xf8859080, 0xd2392008, 0x7d7db24f, + 0x87404118, 0x2804d35f, 0x02b863d7, 0xadfcf190, 0x2db28338, + 0x82f6117f, 0xa84aa1f7, 0x070e33b0, 0xfd33c0e7, 0x527752a0, + 0x78cbe228, 0xd78f706f, 0xa3260139, 0x0c62937e, 0x26de23f6, + 0x899ab1b1, 0x73a742e6, 0xdce3d0a1, 0xf65f6029, 0x591bf26e, + 0xd95580c6, 0x76111281, 0x5cada209, 0xf3e9304e, 0x09d4c319, + 0xa690515e, 0x8c2ce1d6, 0x23687391, 0xcafc06f4, 0x65b894b3, + 0x4f04243b, 0xe040b67c, 0x1a7d452b, 0xb539d76c, 0x9f8567e4, + 0x30c1f5a3, 0xb08f870b, 0x1fcb154c, 0x3577a5c4, 0x9a333783, + 0x600ec4d4, 0xcf4a5693, 0xe5f6e61b, 0x4ab2745c, 0x3e1b050a, + 0x915f974d, 0xbbe327c5, 0x14a7b582, 0xee9a46d5, 0x41ded492, + 0x6b62641a, 0xc426f65d, 0x446884f5, 0xeb2c16b2, 0xc190a63a, + 0x6ed4347d, 0x94e9c72a, 0x3bad556d, 0x1111e5e5, 0xbe5577a2, + 0xf8430749, 0x5707950e, 0x7dbb2586, 0xd2ffb7c1, 0x28c24496, + 0x8786d6d1, 0xad3a6659, 0x027ef41e, 0x823086b6, 0x2d7414f1, + 0x07c8a479, 0xa88c363e, 0x52b1c569, 0xfdf5572e, 0xd749e7a6, + 0x780d75e1, 0x0ca404b7, 0xa3e096f0, 0x895c2678, 0x2618b43f, + 0xdc254768, 0x7361d52f, 0x59dd65a7, 0xf699f7e0, 0x76d78548, + 0xd993170f, 0xf32fa787, 0x5c6b35c0, 0xa656c697, 0x091254d0, + 0x23aee458, 0x8cea761f, 0xaf82058e, 0x00c697c9, 0x2a7a2741, + 0x853eb506, 0x7f034651, 0xd047d416, 0xfafb649e, 0x55bff6d9, + 0xd5f18471, 0x7ab51636, 0x5009a6be, 0xff4d34f9, 0x0570c7ae, + 0xaa3455e9, 0x8088e561, 0x2fcc7726, 0x5b650670, 0xf4219437, + 0xde9d24bf, 0x71d9b6f8, 0x8be445af, 0x24a0d7e8, 0x0e1c6760, + 0xa158f527, 0x2116878f, 0x8e5215c8, 0xa4eea540, 0x0baa3707, + 0xf197c450, 0x5ed35617, 0x746fe69f, 0xdb2b74d8, 0x9d3d0433, + 0x32799674, 0x18c526fc, 0xb781b4bb, 0x4dbc47ec, 0xe2f8d5ab, + 0xc8446523, 0x6700f764, 0xe74e85cc, 0x480a178b, 0x62b6a703, + 0xcdf23544, 0x37cfc613, 0x988b5454, 0xb237e4dc, 0x1d73769b, + 0x69da07cd, 0xc69e958a, 0xec222502, 0x4366b745, 0xb95b4412, + 0x161fd655, 0x3ca366dd, 0x93e7f49a, 0x13a98632, 0xbced1475, + 0x9651a4fd, 0x391536ba, 0xc328c5ed, 0x6c6c57aa, 0x46d0e722, + 0xe9947565}, + {0x00000000, 0x4e890ba9, 0x9d121752, 0xd39b1cfb, 0xe15528e5, + 0xafdc234c, 0x7c473fb7, 0x32ce341e, 0x19db578b, 0x57525c22, + 0x84c940d9, 0xca404b70, 0xf88e7f6e, 0xb60774c7, 0x659c683c, + 0x2b156395, 0x33b6af16, 0x7d3fa4bf, 0xaea4b844, 0xe02db3ed, + 0xd2e387f3, 0x9c6a8c5a, 0x4ff190a1, 0x01789b08, 0x2a6df89d, + 0x64e4f334, 0xb77fefcf, 0xf9f6e466, 0xcb38d078, 0x85b1dbd1, + 0x562ac72a, 0x18a3cc83, 0x676d5e2c, 0x29e45585, 0xfa7f497e, + 0xb4f642d7, 0x863876c9, 0xc8b17d60, 0x1b2a619b, 0x55a36a32, + 0x7eb609a7, 0x303f020e, 0xe3a41ef5, 0xad2d155c, 0x9fe32142, + 0xd16a2aeb, 0x02f13610, 0x4c783db9, 0x54dbf13a, 0x1a52fa93, + 0xc9c9e668, 0x8740edc1, 0xb58ed9df, 0xfb07d276, 0x289cce8d, + 0x6615c524, 0x4d00a6b1, 0x0389ad18, 0xd012b1e3, 0x9e9bba4a, + 0xac558e54, 0xe2dc85fd, 0x31479906, 0x7fce92af, 0xcedabc58, + 0x8053b7f1, 0x53c8ab0a, 0x1d41a0a3, 0x2f8f94bd, 0x61069f14, + 0xb29d83ef, 0xfc148846, 0xd701ebd3, 0x9988e07a, 0x4a13fc81, + 0x049af728, 0x3654c336, 0x78ddc89f, 0xab46d464, 0xe5cfdfcd, + 0xfd6c134e, 0xb3e518e7, 0x607e041c, 0x2ef70fb5, 0x1c393bab, + 0x52b03002, 0x812b2cf9, 0xcfa22750, 0xe4b744c5, 0xaa3e4f6c, + 0x79a55397, 0x372c583e, 0x05e26c20, 0x4b6b6789, 0x98f07b72, + 0xd67970db, 0xa9b7e274, 0xe73ee9dd, 0x34a5f526, 0x7a2cfe8f, + 0x48e2ca91, 0x066bc138, 0xd5f0ddc3, 0x9b79d66a, 0xb06cb5ff, + 0xfee5be56, 0x2d7ea2ad, 0x63f7a904, 0x51399d1a, 0x1fb096b3, + 0xcc2b8a48, 0x82a281e1, 0x9a014d62, 0xd48846cb, 0x07135a30, + 0x499a5199, 0x7b546587, 0x35dd6e2e, 0xe64672d5, 0xa8cf797c, + 0x83da1ae9, 0xcd531140, 0x1ec80dbb, 0x50410612, 0x628f320c, + 0x2c0639a5, 0xff9d255e, 0xb1142ef7, 0x46c47ef1, 0x084d7558, + 0xdbd669a3, 0x955f620a, 0xa7915614, 0xe9185dbd, 0x3a834146, + 0x740a4aef, 0x5f1f297a, 0x119622d3, 0xc20d3e28, 0x8c843581, + 0xbe4a019f, 0xf0c30a36, 0x235816cd, 0x6dd11d64, 0x7572d1e7, + 0x3bfbda4e, 0xe860c6b5, 0xa6e9cd1c, 0x9427f902, 0xdaaef2ab, + 0x0935ee50, 0x47bce5f9, 0x6ca9866c, 0x22208dc5, 0xf1bb913e, + 0xbf329a97, 0x8dfcae89, 0xc375a520, 0x10eeb9db, 0x5e67b272, + 0x21a920dd, 0x6f202b74, 0xbcbb378f, 0xf2323c26, 0xc0fc0838, + 0x8e750391, 0x5dee1f6a, 0x136714c3, 0x38727756, 0x76fb7cff, + 0xa5606004, 0xebe96bad, 0xd9275fb3, 0x97ae541a, 0x443548e1, + 0x0abc4348, 0x121f8fcb, 0x5c968462, 0x8f0d9899, 0xc1849330, + 0xf34aa72e, 0xbdc3ac87, 0x6e58b07c, 0x20d1bbd5, 0x0bc4d840, + 0x454dd3e9, 0x96d6cf12, 0xd85fc4bb, 0xea91f0a5, 0xa418fb0c, + 0x7783e7f7, 0x390aec5e, 0x881ec2a9, 0xc697c900, 0x150cd5fb, + 0x5b85de52, 0x694bea4c, 0x27c2e1e5, 0xf459fd1e, 0xbad0f6b7, + 0x91c59522, 0xdf4c9e8b, 0x0cd78270, 0x425e89d9, 0x7090bdc7, + 0x3e19b66e, 0xed82aa95, 0xa30ba13c, 0xbba86dbf, 0xf5216616, + 0x26ba7aed, 0x68337144, 0x5afd455a, 0x14744ef3, 0xc7ef5208, + 0x896659a1, 0xa2733a34, 0xecfa319d, 0x3f612d66, 0x71e826cf, + 0x432612d1, 0x0daf1978, 0xde340583, 0x90bd0e2a, 0xef739c85, + 0xa1fa972c, 0x72618bd7, 0x3ce8807e, 0x0e26b460, 0x40afbfc9, + 0x9334a332, 0xddbda89b, 0xf6a8cb0e, 0xb821c0a7, 0x6bbadc5c, + 0x2533d7f5, 0x17fde3eb, 0x5974e842, 0x8aeff4b9, 0xc466ff10, + 0xdcc53393, 0x924c383a, 0x41d724c1, 0x0f5e2f68, 0x3d901b76, + 0x731910df, 0xa0820c24, 0xee0b078d, 0xc51e6418, 0x8b976fb1, + 0x580c734a, 0x168578e3, 0x244b4cfd, 0x6ac24754, 0xb9595baf, + 0xf7d05006}, + {0x00000000, 0x8d88fde2, 0xc060fd85, 0x4de80067, 0x5bb0fd4b, + 0xd63800a9, 0x9bd000ce, 0x1658fd2c, 0xb761fa96, 0x3ae90774, + 0x77010713, 0xfa89faf1, 0xecd107dd, 0x6159fa3f, 0x2cb1fa58, + 0xa13907ba, 0xb5b2f36d, 0x383a0e8f, 0x75d20ee8, 0xf85af30a, + 0xee020e26, 0x638af3c4, 0x2e62f3a3, 0xa3ea0e41, 0x02d309fb, + 0x8f5bf419, 0xc2b3f47e, 0x4f3b099c, 0x5963f4b0, 0xd4eb0952, + 0x99030935, 0x148bf4d7, 0xb014e09b, 0x3d9c1d79, 0x70741d1e, + 0xfdfce0fc, 0xeba41dd0, 0x662ce032, 0x2bc4e055, 0xa64c1db7, + 0x07751a0d, 0x8afde7ef, 0xc715e788, 0x4a9d1a6a, 0x5cc5e746, + 0xd14d1aa4, 0x9ca51ac3, 0x112de721, 0x05a613f6, 0x882eee14, + 0xc5c6ee73, 0x484e1391, 0x5e16eebd, 0xd39e135f, 0x9e761338, + 0x13feeeda, 0xb2c7e960, 0x3f4f1482, 0x72a714e5, 0xff2fe907, + 0xe977142b, 0x64ffe9c9, 0x2917e9ae, 0xa49f144c, 0xbb58c777, + 0x36d03a95, 0x7b383af2, 0xf6b0c710, 0xe0e83a3c, 0x6d60c7de, + 0x2088c7b9, 0xad003a5b, 0x0c393de1, 0x81b1c003, 0xcc59c064, + 0x41d13d86, 0x5789c0aa, 0xda013d48, 0x97e93d2f, 0x1a61c0cd, + 0x0eea341a, 0x8362c9f8, 0xce8ac99f, 0x4302347d, 0x555ac951, + 0xd8d234b3, 0x953a34d4, 0x18b2c936, 0xb98bce8c, 0x3403336e, + 0x79eb3309, 0xf463ceeb, 0xe23b33c7, 0x6fb3ce25, 0x225bce42, + 0xafd333a0, 0x0b4c27ec, 0x86c4da0e, 0xcb2cda69, 0x46a4278b, + 0x50fcdaa7, 0xdd742745, 0x909c2722, 0x1d14dac0, 0xbc2ddd7a, + 0x31a52098, 0x7c4d20ff, 0xf1c5dd1d, 0xe79d2031, 0x6a15ddd3, + 0x27fdddb4, 0xaa752056, 0xbefed481, 0x33762963, 0x7e9e2904, + 0xf316d4e6, 0xe54e29ca, 0x68c6d428, 0x252ed44f, 0xa8a629ad, + 0x099f2e17, 0x8417d3f5, 0xc9ffd392, 0x44772e70, 0x522fd35c, + 0xdfa72ebe, 0x924f2ed9, 0x1fc7d33b, 0xadc088af, 0x2048754d, + 0x6da0752a, 0xe02888c8, 0xf67075e4, 0x7bf88806, 0x36108861, + 0xbb987583, 0x1aa17239, 0x97298fdb, 0xdac18fbc, 0x5749725e, + 0x41118f72, 0xcc997290, 0x817172f7, 0x0cf98f15, 0x18727bc2, + 0x95fa8620, 0xd8128647, 0x559a7ba5, 0x43c28689, 0xce4a7b6b, + 0x83a27b0c, 0x0e2a86ee, 0xaf138154, 0x229b7cb6, 0x6f737cd1, + 0xe2fb8133, 0xf4a37c1f, 0x792b81fd, 0x34c3819a, 0xb94b7c78, + 0x1dd46834, 0x905c95d6, 0xddb495b1, 0x503c6853, 0x4664957f, + 0xcbec689d, 0x860468fa, 0x0b8c9518, 0xaab592a2, 0x273d6f40, + 0x6ad56f27, 0xe75d92c5, 0xf1056fe9, 0x7c8d920b, 0x3165926c, + 0xbced6f8e, 0xa8669b59, 0x25ee66bb, 0x680666dc, 0xe58e9b3e, + 0xf3d66612, 0x7e5e9bf0, 0x33b69b97, 0xbe3e6675, 0x1f0761cf, + 0x928f9c2d, 0xdf679c4a, 0x52ef61a8, 0x44b79c84, 0xc93f6166, + 0x84d76101, 0x095f9ce3, 0x16984fd8, 0x9b10b23a, 0xd6f8b25d, + 0x5b704fbf, 0x4d28b293, 0xc0a04f71, 0x8d484f16, 0x00c0b2f4, + 0xa1f9b54e, 0x2c7148ac, 0x619948cb, 0xec11b529, 0xfa494805, + 0x77c1b5e7, 0x3a29b580, 0xb7a14862, 0xa32abcb5, 0x2ea24157, + 0x634a4130, 0xeec2bcd2, 0xf89a41fe, 0x7512bc1c, 0x38fabc7b, + 0xb5724199, 0x144b4623, 0x99c3bbc1, 0xd42bbba6, 0x59a34644, + 0x4ffbbb68, 0xc273468a, 0x8f9b46ed, 0x0213bb0f, 0xa68caf43, + 0x2b0452a1, 0x66ec52c6, 0xeb64af24, 0xfd3c5208, 0x70b4afea, + 0x3d5caf8d, 0xb0d4526f, 0x11ed55d5, 0x9c65a837, 0xd18da850, + 0x5c0555b2, 0x4a5da89e, 0xc7d5557c, 0x8a3d551b, 0x07b5a8f9, + 0x133e5c2e, 0x9eb6a1cc, 0xd35ea1ab, 0x5ed65c49, 0x488ea165, + 0xc5065c87, 0x88ee5ce0, 0x0566a102, 0xa45fa6b8, 0x29d75b5a, + 0x643f5b3d, 0xe9b7a6df, 0xffef5bf3, 0x7267a611, 0x3f8fa676, + 0xb2075b94}, + {0x00000000, 0x80f0171f, 0xda91287f, 0x5a613f60, 0x6e5356bf, + 0xeea341a0, 0xb4c27ec0, 0x343269df, 0xdca6ad7e, 0x5c56ba61, + 0x06378501, 0x86c7921e, 0xb2f5fbc1, 0x3205ecde, 0x6864d3be, + 0xe894c4a1, 0x623c5cbd, 0xe2cc4ba2, 0xb8ad74c2, 0x385d63dd, + 0x0c6f0a02, 0x8c9f1d1d, 0xd6fe227d, 0x560e3562, 0xbe9af1c3, + 0x3e6ae6dc, 0x640bd9bc, 0xe4fbcea3, 0xd0c9a77c, 0x5039b063, + 0x0a588f03, 0x8aa8981c, 0xc478b97a, 0x4488ae65, 0x1ee99105, + 0x9e19861a, 0xaa2befc5, 0x2adbf8da, 0x70bac7ba, 0xf04ad0a5, + 0x18de1404, 0x982e031b, 0xc24f3c7b, 0x42bf2b64, 0x768d42bb, + 0xf67d55a4, 0xac1c6ac4, 0x2cec7ddb, 0xa644e5c7, 0x26b4f2d8, + 0x7cd5cdb8, 0xfc25daa7, 0xc817b378, 0x48e7a467, 0x12869b07, + 0x92768c18, 0x7ae248b9, 0xfa125fa6, 0xa07360c6, 0x208377d9, + 0x14b11e06, 0x94410919, 0xce203679, 0x4ed02166, 0x538074b5, + 0xd37063aa, 0x89115cca, 0x09e14bd5, 0x3dd3220a, 0xbd233515, + 0xe7420a75, 0x67b21d6a, 0x8f26d9cb, 0x0fd6ced4, 0x55b7f1b4, + 0xd547e6ab, 0xe1758f74, 0x6185986b, 0x3be4a70b, 0xbb14b014, + 0x31bc2808, 0xb14c3f17, 0xeb2d0077, 0x6bdd1768, 0x5fef7eb7, + 0xdf1f69a8, 0x857e56c8, 0x058e41d7, 0xed1a8576, 0x6dea9269, + 0x378bad09, 0xb77bba16, 0x8349d3c9, 0x03b9c4d6, 0x59d8fbb6, + 0xd928eca9, 0x97f8cdcf, 0x1708dad0, 0x4d69e5b0, 0xcd99f2af, + 0xf9ab9b70, 0x795b8c6f, 0x233ab30f, 0xa3caa410, 0x4b5e60b1, + 0xcbae77ae, 0x91cf48ce, 0x113f5fd1, 0x250d360e, 0xa5fd2111, + 0xff9c1e71, 0x7f6c096e, 0xf5c49172, 0x7534866d, 0x2f55b90d, + 0xafa5ae12, 0x9b97c7cd, 0x1b67d0d2, 0x4106efb2, 0xc1f6f8ad, + 0x29623c0c, 0xa9922b13, 0xf3f31473, 0x7303036c, 0x47316ab3, + 0xc7c17dac, 0x9da042cc, 0x1d5055d3, 0xa700e96a, 0x27f0fe75, + 0x7d91c115, 0xfd61d60a, 0xc953bfd5, 0x49a3a8ca, 0x13c297aa, + 0x933280b5, 0x7ba64414, 0xfb56530b, 0xa1376c6b, 0x21c77b74, + 0x15f512ab, 0x950505b4, 0xcf643ad4, 0x4f942dcb, 0xc53cb5d7, + 0x45cca2c8, 0x1fad9da8, 0x9f5d8ab7, 0xab6fe368, 0x2b9ff477, + 0x71fecb17, 0xf10edc08, 0x199a18a9, 0x996a0fb6, 0xc30b30d6, + 0x43fb27c9, 0x77c94e16, 0xf7395909, 0xad586669, 0x2da87176, + 0x63785010, 0xe388470f, 0xb9e9786f, 0x39196f70, 0x0d2b06af, + 0x8ddb11b0, 0xd7ba2ed0, 0x574a39cf, 0xbfdefd6e, 0x3f2eea71, + 0x654fd511, 0xe5bfc20e, 0xd18dabd1, 0x517dbcce, 0x0b1c83ae, + 0x8bec94b1, 0x01440cad, 0x81b41bb2, 0xdbd524d2, 0x5b2533cd, + 0x6f175a12, 0xefe74d0d, 0xb586726d, 0x35766572, 0xdde2a1d3, + 0x5d12b6cc, 0x077389ac, 0x87839eb3, 0xb3b1f76c, 0x3341e073, + 0x6920df13, 0xe9d0c80c, 0xf4809ddf, 0x74708ac0, 0x2e11b5a0, + 0xaee1a2bf, 0x9ad3cb60, 0x1a23dc7f, 0x4042e31f, 0xc0b2f400, + 0x282630a1, 0xa8d627be, 0xf2b718de, 0x72470fc1, 0x4675661e, + 0xc6857101, 0x9ce44e61, 0x1c14597e, 0x96bcc162, 0x164cd67d, + 0x4c2de91d, 0xccddfe02, 0xf8ef97dd, 0x781f80c2, 0x227ebfa2, + 0xa28ea8bd, 0x4a1a6c1c, 0xcaea7b03, 0x908b4463, 0x107b537c, + 0x24493aa3, 0xa4b92dbc, 0xfed812dc, 0x7e2805c3, 0x30f824a5, + 0xb00833ba, 0xea690cda, 0x6a991bc5, 0x5eab721a, 0xde5b6505, + 0x843a5a65, 0x04ca4d7a, 0xec5e89db, 0x6cae9ec4, 0x36cfa1a4, + 0xb63fb6bb, 0x820ddf64, 0x02fdc87b, 0x589cf71b, 0xd86ce004, + 0x52c47818, 0xd2346f07, 0x88555067, 0x08a54778, 0x3c972ea7, + 0xbc6739b8, 0xe60606d8, 0x66f611c7, 0x8e62d566, 0x0e92c279, + 0x54f3fd19, 0xd403ea06, 0xe03183d9, 0x60c194c6, 0x3aa0aba6, + 0xba50bcb9}, + {0x00000000, 0x9570d495, 0xf190af6b, 0x64e07bfe, 0x38505897, + 0xad208c02, 0xc9c0f7fc, 0x5cb02369, 0x70a0b12e, 0xe5d065bb, + 0x81301e45, 0x1440cad0, 0x48f0e9b9, 0xdd803d2c, 0xb96046d2, + 0x2c109247, 0xe141625c, 0x7431b6c9, 0x10d1cd37, 0x85a119a2, + 0xd9113acb, 0x4c61ee5e, 0x288195a0, 0xbdf14135, 0x91e1d372, + 0x049107e7, 0x60717c19, 0xf501a88c, 0xa9b18be5, 0x3cc15f70, + 0x5821248e, 0xcd51f01b, 0x19f3c2f9, 0x8c83166c, 0xe8636d92, + 0x7d13b907, 0x21a39a6e, 0xb4d34efb, 0xd0333505, 0x4543e190, + 0x695373d7, 0xfc23a742, 0x98c3dcbc, 0x0db30829, 0x51032b40, + 0xc473ffd5, 0xa093842b, 0x35e350be, 0xf8b2a0a5, 0x6dc27430, + 0x09220fce, 0x9c52db5b, 0xc0e2f832, 0x55922ca7, 0x31725759, + 0xa40283cc, 0x8812118b, 0x1d62c51e, 0x7982bee0, 0xecf26a75, + 0xb042491c, 0x25329d89, 0x41d2e677, 0xd4a232e2, 0x33e785f2, + 0xa6975167, 0xc2772a99, 0x5707fe0c, 0x0bb7dd65, 0x9ec709f0, + 0xfa27720e, 0x6f57a69b, 0x434734dc, 0xd637e049, 0xb2d79bb7, + 0x27a74f22, 0x7b176c4b, 0xee67b8de, 0x8a87c320, 0x1ff717b5, + 0xd2a6e7ae, 0x47d6333b, 0x233648c5, 0xb6469c50, 0xeaf6bf39, + 0x7f866bac, 0x1b661052, 0x8e16c4c7, 0xa2065680, 0x37768215, + 0x5396f9eb, 0xc6e62d7e, 0x9a560e17, 0x0f26da82, 0x6bc6a17c, + 0xfeb675e9, 0x2a14470b, 0xbf64939e, 0xdb84e860, 0x4ef43cf5, + 0x12441f9c, 0x8734cb09, 0xe3d4b0f7, 0x76a46462, 0x5ab4f625, + 0xcfc422b0, 0xab24594e, 0x3e548ddb, 0x62e4aeb2, 0xf7947a27, + 0x937401d9, 0x0604d54c, 0xcb552557, 0x5e25f1c2, 0x3ac58a3c, + 0xafb55ea9, 0xf3057dc0, 0x6675a955, 0x0295d2ab, 0x97e5063e, + 0xbbf59479, 0x2e8540ec, 0x4a653b12, 0xdf15ef87, 0x83a5ccee, + 0x16d5187b, 0x72356385, 0xe745b710, 0x67cf0be4, 0xf2bfdf71, + 0x965fa48f, 0x032f701a, 0x5f9f5373, 0xcaef87e6, 0xae0ffc18, + 0x3b7f288d, 0x176fbaca, 0x821f6e5f, 0xe6ff15a1, 0x738fc134, + 0x2f3fe25d, 0xba4f36c8, 0xdeaf4d36, 0x4bdf99a3, 0x868e69b8, + 0x13febd2d, 0x771ec6d3, 0xe26e1246, 0xbede312f, 0x2baee5ba, + 0x4f4e9e44, 0xda3e4ad1, 0xf62ed896, 0x635e0c03, 0x07be77fd, + 0x92cea368, 0xce7e8001, 0x5b0e5494, 0x3fee2f6a, 0xaa9efbff, + 0x7e3cc91d, 0xeb4c1d88, 0x8fac6676, 0x1adcb2e3, 0x466c918a, + 0xd31c451f, 0xb7fc3ee1, 0x228cea74, 0x0e9c7833, 0x9becaca6, + 0xff0cd758, 0x6a7c03cd, 0x36cc20a4, 0xa3bcf431, 0xc75c8fcf, + 0x522c5b5a, 0x9f7dab41, 0x0a0d7fd4, 0x6eed042a, 0xfb9dd0bf, + 0xa72df3d6, 0x325d2743, 0x56bd5cbd, 0xc3cd8828, 0xefdd1a6f, + 0x7aadcefa, 0x1e4db504, 0x8b3d6191, 0xd78d42f8, 0x42fd966d, + 0x261ded93, 0xb36d3906, 0x54288e16, 0xc1585a83, 0xa5b8217d, + 0x30c8f5e8, 0x6c78d681, 0xf9080214, 0x9de879ea, 0x0898ad7f, + 0x24883f38, 0xb1f8ebad, 0xd5189053, 0x406844c6, 0x1cd867af, + 0x89a8b33a, 0xed48c8c4, 0x78381c51, 0xb569ec4a, 0x201938df, + 0x44f94321, 0xd18997b4, 0x8d39b4dd, 0x18496048, 0x7ca91bb6, + 0xe9d9cf23, 0xc5c95d64, 0x50b989f1, 0x3459f20f, 0xa129269a, + 0xfd9905f3, 0x68e9d166, 0x0c09aa98, 0x99797e0d, 0x4ddb4cef, + 0xd8ab987a, 0xbc4be384, 0x293b3711, 0x758b1478, 0xe0fbc0ed, + 0x841bbb13, 0x116b6f86, 0x3d7bfdc1, 0xa80b2954, 0xcceb52aa, + 0x599b863f, 0x052ba556, 0x905b71c3, 0xf4bb0a3d, 0x61cbdea8, + 0xac9a2eb3, 0x39eafa26, 0x5d0a81d8, 0xc87a554d, 0x94ca7624, + 0x01baa2b1, 0x655ad94f, 0xf02a0dda, 0xdc3a9f9d, 0x494a4b08, + 0x2daa30f6, 0xb8dae463, 0xe46ac70a, 0x711a139f, 0x15fa6861, + 0x808abcf4}, + {0x00000000, 0xcf9e17c8, 0x444d29d1, 0x8bd33e19, 0x889a53a2, + 0x4704446a, 0xccd77a73, 0x03496dbb, 0xca45a105, 0x05dbb6cd, + 0x8e0888d4, 0x41969f1c, 0x42dff2a7, 0x8d41e56f, 0x0692db76, + 0xc90cccbe, 0x4ffa444b, 0x80645383, 0x0bb76d9a, 0xc4297a52, + 0xc76017e9, 0x08fe0021, 0x832d3e38, 0x4cb329f0, 0x85bfe54e, + 0x4a21f286, 0xc1f2cc9f, 0x0e6cdb57, 0x0d25b6ec, 0xc2bba124, + 0x49689f3d, 0x86f688f5, 0x9ff48896, 0x506a9f5e, 0xdbb9a147, + 0x1427b68f, 0x176edb34, 0xd8f0ccfc, 0x5323f2e5, 0x9cbde52d, + 0x55b12993, 0x9a2f3e5b, 0x11fc0042, 0xde62178a, 0xdd2b7a31, + 0x12b56df9, 0x996653e0, 0x56f84428, 0xd00eccdd, 0x1f90db15, + 0x9443e50c, 0x5bddf2c4, 0x58949f7f, 0x970a88b7, 0x1cd9b6ae, + 0xd347a166, 0x1a4b6dd8, 0xd5d57a10, 0x5e064409, 0x919853c1, + 0x92d13e7a, 0x5d4f29b2, 0xd69c17ab, 0x19020063, 0xe498176d, + 0x2b0600a5, 0xa0d53ebc, 0x6f4b2974, 0x6c0244cf, 0xa39c5307, + 0x284f6d1e, 0xe7d17ad6, 0x2eddb668, 0xe143a1a0, 0x6a909fb9, + 0xa50e8871, 0xa647e5ca, 0x69d9f202, 0xe20acc1b, 0x2d94dbd3, + 0xab625326, 0x64fc44ee, 0xef2f7af7, 0x20b16d3f, 0x23f80084, + 0xec66174c, 0x67b52955, 0xa82b3e9d, 0x6127f223, 0xaeb9e5eb, + 0x256adbf2, 0xeaf4cc3a, 0xe9bda181, 0x2623b649, 0xadf08850, + 0x626e9f98, 0x7b6c9ffb, 0xb4f28833, 0x3f21b62a, 0xf0bfa1e2, + 0xf3f6cc59, 0x3c68db91, 0xb7bbe588, 0x7825f240, 0xb1293efe, + 0x7eb72936, 0xf564172f, 0x3afa00e7, 0x39b36d5c, 0xf62d7a94, + 0x7dfe448d, 0xb2605345, 0x3496dbb0, 0xfb08cc78, 0x70dbf261, + 0xbf45e5a9, 0xbc0c8812, 0x73929fda, 0xf841a1c3, 0x37dfb60b, + 0xfed37ab5, 0x314d6d7d, 0xba9e5364, 0x750044ac, 0x76492917, + 0xb9d73edf, 0x320400c6, 0xfd9a170e, 0x1241289b, 0xdddf3f53, + 0x560c014a, 0x99921682, 0x9adb7b39, 0x55456cf1, 0xde9652e8, + 0x11084520, 0xd804899e, 0x179a9e56, 0x9c49a04f, 0x53d7b787, + 0x509eda3c, 0x9f00cdf4, 0x14d3f3ed, 0xdb4de425, 0x5dbb6cd0, + 0x92257b18, 0x19f64501, 0xd66852c9, 0xd5213f72, 0x1abf28ba, + 0x916c16a3, 0x5ef2016b, 0x97fecdd5, 0x5860da1d, 0xd3b3e404, + 0x1c2df3cc, 0x1f649e77, 0xd0fa89bf, 0x5b29b7a6, 0x94b7a06e, + 0x8db5a00d, 0x422bb7c5, 0xc9f889dc, 0x06669e14, 0x052ff3af, + 0xcab1e467, 0x4162da7e, 0x8efccdb6, 0x47f00108, 0x886e16c0, + 0x03bd28d9, 0xcc233f11, 0xcf6a52aa, 0x00f44562, 0x8b277b7b, + 0x44b96cb3, 0xc24fe446, 0x0dd1f38e, 0x8602cd97, 0x499cda5f, + 0x4ad5b7e4, 0x854ba02c, 0x0e989e35, 0xc10689fd, 0x080a4543, + 0xc794528b, 0x4c476c92, 0x83d97b5a, 0x809016e1, 0x4f0e0129, + 0xc4dd3f30, 0x0b4328f8, 0xf6d93ff6, 0x3947283e, 0xb2941627, + 0x7d0a01ef, 0x7e436c54, 0xb1dd7b9c, 0x3a0e4585, 0xf590524d, + 0x3c9c9ef3, 0xf302893b, 0x78d1b722, 0xb74fa0ea, 0xb406cd51, + 0x7b98da99, 0xf04be480, 0x3fd5f348, 0xb9237bbd, 0x76bd6c75, + 0xfd6e526c, 0x32f045a4, 0x31b9281f, 0xfe273fd7, 0x75f401ce, + 0xba6a1606, 0x7366dab8, 0xbcf8cd70, 0x372bf369, 0xf8b5e4a1, + 0xfbfc891a, 0x34629ed2, 0xbfb1a0cb, 0x702fb703, 0x692db760, + 0xa6b3a0a8, 0x2d609eb1, 0xe2fe8979, 0xe1b7e4c2, 0x2e29f30a, + 0xa5facd13, 0x6a64dadb, 0xa3681665, 0x6cf601ad, 0xe7253fb4, + 0x28bb287c, 0x2bf245c7, 0xe46c520f, 0x6fbf6c16, 0xa0217bde, + 0x26d7f32b, 0xe949e4e3, 0x629adafa, 0xad04cd32, 0xae4da089, + 0x61d3b741, 0xea008958, 0x259e9e90, 0xec92522e, 0x230c45e6, + 0xa8df7bff, 0x67416c37, 0x6408018c, 0xab961644, 0x2045285d, + 0xefdb3f95}, + {0x00000000, 0x24825136, 0x4904a26c, 0x6d86f35a, 0x920944d8, + 0xb68b15ee, 0xdb0de6b4, 0xff8fb782, 0xff638ff1, 0xdbe1dec7, + 0xb6672d9d, 0x92e57cab, 0x6d6acb29, 0x49e89a1f, 0x246e6945, + 0x00ec3873, 0x25b619a3, 0x01344895, 0x6cb2bbcf, 0x4830eaf9, + 0xb7bf5d7b, 0x933d0c4d, 0xfebbff17, 0xda39ae21, 0xdad59652, + 0xfe57c764, 0x93d1343e, 0xb7536508, 0x48dcd28a, 0x6c5e83bc, + 0x01d870e6, 0x255a21d0, 0x4b6c3346, 0x6fee6270, 0x0268912a, + 0x26eac01c, 0xd965779e, 0xfde726a8, 0x9061d5f2, 0xb4e384c4, + 0xb40fbcb7, 0x908ded81, 0xfd0b1edb, 0xd9894fed, 0x2606f86f, + 0x0284a959, 0x6f025a03, 0x4b800b35, 0x6eda2ae5, 0x4a587bd3, + 0x27de8889, 0x035cd9bf, 0xfcd36e3d, 0xd8513f0b, 0xb5d7cc51, + 0x91559d67, 0x91b9a514, 0xb53bf422, 0xd8bd0778, 0xfc3f564e, + 0x03b0e1cc, 0x2732b0fa, 0x4ab443a0, 0x6e361296, 0x96d8668c, + 0xb25a37ba, 0xdfdcc4e0, 0xfb5e95d6, 0x04d12254, 0x20537362, + 0x4dd58038, 0x6957d10e, 0x69bbe97d, 0x4d39b84b, 0x20bf4b11, + 0x043d1a27, 0xfbb2ada5, 0xdf30fc93, 0xb2b60fc9, 0x96345eff, + 0xb36e7f2f, 0x97ec2e19, 0xfa6add43, 0xdee88c75, 0x21673bf7, + 0x05e56ac1, 0x6863999b, 0x4ce1c8ad, 0x4c0df0de, 0x688fa1e8, + 0x050952b2, 0x218b0384, 0xde04b406, 0xfa86e530, 0x9700166a, + 0xb382475c, 0xddb455ca, 0xf93604fc, 0x94b0f7a6, 0xb032a690, + 0x4fbd1112, 0x6b3f4024, 0x06b9b37e, 0x223be248, 0x22d7da3b, + 0x06558b0d, 0x6bd37857, 0x4f512961, 0xb0de9ee3, 0x945ccfd5, + 0xf9da3c8f, 0xdd586db9, 0xf8024c69, 0xdc801d5f, 0xb106ee05, + 0x9584bf33, 0x6a0b08b1, 0x4e895987, 0x230faadd, 0x078dfbeb, + 0x0761c398, 0x23e392ae, 0x4e6561f4, 0x6ae730c2, 0x95688740, + 0xb1ead676, 0xdc6c252c, 0xf8ee741a, 0xf6c1cb59, 0xd2439a6f, + 0xbfc56935, 0x9b473803, 0x64c88f81, 0x404adeb7, 0x2dcc2ded, + 0x094e7cdb, 0x09a244a8, 0x2d20159e, 0x40a6e6c4, 0x6424b7f2, + 0x9bab0070, 0xbf295146, 0xd2afa21c, 0xf62df32a, 0xd377d2fa, + 0xf7f583cc, 0x9a737096, 0xbef121a0, 0x417e9622, 0x65fcc714, + 0x087a344e, 0x2cf86578, 0x2c145d0b, 0x08960c3d, 0x6510ff67, + 0x4192ae51, 0xbe1d19d3, 0x9a9f48e5, 0xf719bbbf, 0xd39bea89, + 0xbdadf81f, 0x992fa929, 0xf4a95a73, 0xd02b0b45, 0x2fa4bcc7, + 0x0b26edf1, 0x66a01eab, 0x42224f9d, 0x42ce77ee, 0x664c26d8, + 0x0bcad582, 0x2f4884b4, 0xd0c73336, 0xf4456200, 0x99c3915a, + 0xbd41c06c, 0x981be1bc, 0xbc99b08a, 0xd11f43d0, 0xf59d12e6, + 0x0a12a564, 0x2e90f452, 0x43160708, 0x6794563e, 0x67786e4d, + 0x43fa3f7b, 0x2e7ccc21, 0x0afe9d17, 0xf5712a95, 0xd1f37ba3, + 0xbc7588f9, 0x98f7d9cf, 0x6019add5, 0x449bfce3, 0x291d0fb9, + 0x0d9f5e8f, 0xf210e90d, 0xd692b83b, 0xbb144b61, 0x9f961a57, + 0x9f7a2224, 0xbbf87312, 0xd67e8048, 0xf2fcd17e, 0x0d7366fc, + 0x29f137ca, 0x4477c490, 0x60f595a6, 0x45afb476, 0x612de540, + 0x0cab161a, 0x2829472c, 0xd7a6f0ae, 0xf324a198, 0x9ea252c2, + 0xba2003f4, 0xbacc3b87, 0x9e4e6ab1, 0xf3c899eb, 0xd74ac8dd, + 0x28c57f5f, 0x0c472e69, 0x61c1dd33, 0x45438c05, 0x2b759e93, + 0x0ff7cfa5, 0x62713cff, 0x46f36dc9, 0xb97cda4b, 0x9dfe8b7d, + 0xf0787827, 0xd4fa2911, 0xd4161162, 0xf0944054, 0x9d12b30e, + 0xb990e238, 0x461f55ba, 0x629d048c, 0x0f1bf7d6, 0x2b99a6e0, + 0x0ec38730, 0x2a41d606, 0x47c7255c, 0x6345746a, 0x9ccac3e8, + 0xb84892de, 0xd5ce6184, 0xf14c30b2, 0xf1a008c1, 0xd52259f7, + 0xb8a4aaad, 0x9c26fb9b, 0x63a94c19, 0x472b1d2f, 0x2aadee75, + 0x0e2fbf43}, + {0x00000000, 0x36f290f3, 0x6de521e6, 0x5b17b115, 0xdbca43cc, + 0xed38d33f, 0xb62f622a, 0x80ddf2d9, 0x6ce581d9, 0x5a17112a, + 0x0100a03f, 0x37f230cc, 0xb72fc215, 0x81dd52e6, 0xdacae3f3, + 0xec387300, 0xd9cb03b2, 0xef399341, 0xb42e2254, 0x82dcb2a7, + 0x0201407e, 0x34f3d08d, 0x6fe46198, 0x5916f16b, 0xb52e826b, + 0x83dc1298, 0xd8cba38d, 0xee39337e, 0x6ee4c1a7, 0x58165154, + 0x0301e041, 0x35f370b2, 0x68e70125, 0x5e1591d6, 0x050220c3, + 0x33f0b030, 0xb32d42e9, 0x85dfd21a, 0xdec8630f, 0xe83af3fc, + 0x040280fc, 0x32f0100f, 0x69e7a11a, 0x5f1531e9, 0xdfc8c330, + 0xe93a53c3, 0xb22de2d6, 0x84df7225, 0xb12c0297, 0x87de9264, + 0xdcc92371, 0xea3bb382, 0x6ae6415b, 0x5c14d1a8, 0x070360bd, + 0x31f1f04e, 0xddc9834e, 0xeb3b13bd, 0xb02ca2a8, 0x86de325b, + 0x0603c082, 0x30f15071, 0x6be6e164, 0x5d147197, 0xd1ce024a, + 0xe73c92b9, 0xbc2b23ac, 0x8ad9b35f, 0x0a044186, 0x3cf6d175, + 0x67e16060, 0x5113f093, 0xbd2b8393, 0x8bd91360, 0xd0cea275, + 0xe63c3286, 0x66e1c05f, 0x501350ac, 0x0b04e1b9, 0x3df6714a, + 0x080501f8, 0x3ef7910b, 0x65e0201e, 0x5312b0ed, 0xd3cf4234, + 0xe53dd2c7, 0xbe2a63d2, 0x88d8f321, 0x64e08021, 0x521210d2, + 0x0905a1c7, 0x3ff73134, 0xbf2ac3ed, 0x89d8531e, 0xd2cfe20b, + 0xe43d72f8, 0xb929036f, 0x8fdb939c, 0xd4cc2289, 0xe23eb27a, + 0x62e340a3, 0x5411d050, 0x0f066145, 0x39f4f1b6, 0xd5cc82b6, + 0xe33e1245, 0xb829a350, 0x8edb33a3, 0x0e06c17a, 0x38f45189, + 0x63e3e09c, 0x5511706f, 0x60e200dd, 0x5610902e, 0x0d07213b, + 0x3bf5b1c8, 0xbb284311, 0x8ddad3e2, 0xd6cd62f7, 0xe03ff204, + 0x0c078104, 0x3af511f7, 0x61e2a0e2, 0x57103011, 0xd7cdc2c8, + 0xe13f523b, 0xba28e32e, 0x8cda73dd, 0x78ed02d5, 0x4e1f9226, + 0x15082333, 0x23fab3c0, 0xa3274119, 0x95d5d1ea, 0xcec260ff, + 0xf830f00c, 0x1408830c, 0x22fa13ff, 0x79eda2ea, 0x4f1f3219, + 0xcfc2c0c0, 0xf9305033, 0xa227e126, 0x94d571d5, 0xa1260167, + 0x97d49194, 0xccc32081, 0xfa31b072, 0x7aec42ab, 0x4c1ed258, + 0x1709634d, 0x21fbf3be, 0xcdc380be, 0xfb31104d, 0xa026a158, + 0x96d431ab, 0x1609c372, 0x20fb5381, 0x7bece294, 0x4d1e7267, + 0x100a03f0, 0x26f89303, 0x7def2216, 0x4b1db2e5, 0xcbc0403c, + 0xfd32d0cf, 0xa62561da, 0x90d7f129, 0x7cef8229, 0x4a1d12da, + 0x110aa3cf, 0x27f8333c, 0xa725c1e5, 0x91d75116, 0xcac0e003, + 0xfc3270f0, 0xc9c10042, 0xff3390b1, 0xa42421a4, 0x92d6b157, + 0x120b438e, 0x24f9d37d, 0x7fee6268, 0x491cf29b, 0xa524819b, + 0x93d61168, 0xc8c1a07d, 0xfe33308e, 0x7eeec257, 0x481c52a4, + 0x130be3b1, 0x25f97342, 0xa923009f, 0x9fd1906c, 0xc4c62179, + 0xf234b18a, 0x72e94353, 0x441bd3a0, 0x1f0c62b5, 0x29fef246, + 0xc5c68146, 0xf33411b5, 0xa823a0a0, 0x9ed13053, 0x1e0cc28a, + 0x28fe5279, 0x73e9e36c, 0x451b739f, 0x70e8032d, 0x461a93de, + 0x1d0d22cb, 0x2bffb238, 0xab2240e1, 0x9dd0d012, 0xc6c76107, + 0xf035f1f4, 0x1c0d82f4, 0x2aff1207, 0x71e8a312, 0x471a33e1, + 0xc7c7c138, 0xf13551cb, 0xaa22e0de, 0x9cd0702d, 0xc1c401ba, + 0xf7369149, 0xac21205c, 0x9ad3b0af, 0x1a0e4276, 0x2cfcd285, + 0x77eb6390, 0x4119f363, 0xad218063, 0x9bd31090, 0xc0c4a185, + 0xf6363176, 0x76ebc3af, 0x4019535c, 0x1b0ee249, 0x2dfc72ba, + 0x180f0208, 0x2efd92fb, 0x75ea23ee, 0x4318b31d, 0xc3c541c4, + 0xf537d137, 0xae206022, 0x98d2f0d1, 0x74ea83d1, 0x42181322, + 0x190fa237, 0x2ffd32c4, 0xaf20c01d, 0x99d250ee, 0xc2c5e1fb, + 0xf4377108}}; + +local const z_word_t FAR crc_braid_big_table[][256] = { + {0x0000000000000000, 0xf390f23600000000, 0xe621e56d00000000, + 0x15b1175b00000000, 0xcc43cadb00000000, 0x3fd338ed00000000, + 0x2a622fb600000000, 0xd9f2dd8000000000, 0xd981e56c00000000, + 0x2a11175a00000000, 0x3fa0000100000000, 0xcc30f23700000000, + 0x15c22fb700000000, 0xe652dd8100000000, 0xf3e3cada00000000, + 0x007338ec00000000, 0xb203cbd900000000, 0x419339ef00000000, + 0x54222eb400000000, 0xa7b2dc8200000000, 0x7e40010200000000, + 0x8dd0f33400000000, 0x9861e46f00000000, 0x6bf1165900000000, + 0x6b822eb500000000, 0x9812dc8300000000, 0x8da3cbd800000000, + 0x7e3339ee00000000, 0xa7c1e46e00000000, 0x5451165800000000, + 0x41e0010300000000, 0xb270f33500000000, 0x2501e76800000000, + 0xd691155e00000000, 0xc320020500000000, 0x30b0f03300000000, + 0xe9422db300000000, 0x1ad2df8500000000, 0x0f63c8de00000000, + 0xfcf33ae800000000, 0xfc80020400000000, 0x0f10f03200000000, + 0x1aa1e76900000000, 0xe931155f00000000, 0x30c3c8df00000000, + 0xc3533ae900000000, 0xd6e22db200000000, 0x2572df8400000000, + 0x97022cb100000000, 0x6492de8700000000, 0x7123c9dc00000000, + 0x82b33bea00000000, 0x5b41e66a00000000, 0xa8d1145c00000000, + 0xbd60030700000000, 0x4ef0f13100000000, 0x4e83c9dd00000000, + 0xbd133beb00000000, 0xa8a22cb000000000, 0x5b32de8600000000, + 0x82c0030600000000, 0x7150f13000000000, 0x64e1e66b00000000, + 0x9771145d00000000, 0x4a02ced100000000, 0xb9923ce700000000, + 0xac232bbc00000000, 0x5fb3d98a00000000, 0x8641040a00000000, + 0x75d1f63c00000000, 0x6060e16700000000, 0x93f0135100000000, + 0x93832bbd00000000, 0x6013d98b00000000, 0x75a2ced000000000, + 0x86323ce600000000, 0x5fc0e16600000000, 0xac50135000000000, + 0xb9e1040b00000000, 0x4a71f63d00000000, 0xf801050800000000, + 0x0b91f73e00000000, 0x1e20e06500000000, 0xedb0125300000000, + 0x3442cfd300000000, 0xc7d23de500000000, 0xd2632abe00000000, + 0x21f3d88800000000, 0x2180e06400000000, 0xd210125200000000, + 0xc7a1050900000000, 0x3431f73f00000000, 0xedc32abf00000000, + 0x1e53d88900000000, 0x0be2cfd200000000, 0xf8723de400000000, + 0x6f0329b900000000, 0x9c93db8f00000000, 0x8922ccd400000000, + 0x7ab23ee200000000, 0xa340e36200000000, 0x50d0115400000000, + 0x4561060f00000000, 0xb6f1f43900000000, 0xb682ccd500000000, + 0x45123ee300000000, 0x50a329b800000000, 0xa333db8e00000000, + 0x7ac1060e00000000, 0x8951f43800000000, 0x9ce0e36300000000, + 0x6f70115500000000, 0xdd00e26000000000, 0x2e90105600000000, + 0x3b21070d00000000, 0xc8b1f53b00000000, 0x114328bb00000000, + 0xe2d3da8d00000000, 0xf762cdd600000000, 0x04f23fe000000000, + 0x0481070c00000000, 0xf711f53a00000000, 0xe2a0e26100000000, + 0x1130105700000000, 0xc8c2cdd700000000, 0x3b523fe100000000, + 0x2ee328ba00000000, 0xdd73da8c00000000, 0xd502ed7800000000, + 0x26921f4e00000000, 0x3323081500000000, 0xc0b3fa2300000000, + 0x194127a300000000, 0xead1d59500000000, 0xff60c2ce00000000, + 0x0cf030f800000000, 0x0c83081400000000, 0xff13fa2200000000, + 0xeaa2ed7900000000, 0x19321f4f00000000, 0xc0c0c2cf00000000, + 0x335030f900000000, 0x26e127a200000000, 0xd571d59400000000, + 0x670126a100000000, 0x9491d49700000000, 0x8120c3cc00000000, + 0x72b031fa00000000, 0xab42ec7a00000000, 0x58d21e4c00000000, + 0x4d63091700000000, 0xbef3fb2100000000, 0xbe80c3cd00000000, + 0x4d1031fb00000000, 0x58a126a000000000, 0xab31d49600000000, + 0x72c3091600000000, 0x8153fb2000000000, 0x94e2ec7b00000000, + 0x67721e4d00000000, 0xf0030a1000000000, 0x0393f82600000000, + 0x1622ef7d00000000, 0xe5b21d4b00000000, 0x3c40c0cb00000000, + 0xcfd032fd00000000, 0xda6125a600000000, 0x29f1d79000000000, + 0x2982ef7c00000000, 0xda121d4a00000000, 0xcfa30a1100000000, + 0x3c33f82700000000, 0xe5c125a700000000, 0x1651d79100000000, + 0x03e0c0ca00000000, 0xf07032fc00000000, 0x4200c1c900000000, + 0xb19033ff00000000, 0xa42124a400000000, 0x57b1d69200000000, + 0x8e430b1200000000, 0x7dd3f92400000000, 0x6862ee7f00000000, + 0x9bf21c4900000000, 0x9b8124a500000000, 0x6811d69300000000, + 0x7da0c1c800000000, 0x8e3033fe00000000, 0x57c2ee7e00000000, + 0xa4521c4800000000, 0xb1e30b1300000000, 0x4273f92500000000, + 0x9f0023a900000000, 0x6c90d19f00000000, 0x7921c6c400000000, + 0x8ab134f200000000, 0x5343e97200000000, 0xa0d31b4400000000, + 0xb5620c1f00000000, 0x46f2fe2900000000, 0x4681c6c500000000, + 0xb51134f300000000, 0xa0a023a800000000, 0x5330d19e00000000, + 0x8ac20c1e00000000, 0x7952fe2800000000, 0x6ce3e97300000000, + 0x9f731b4500000000, 0x2d03e87000000000, 0xde931a4600000000, + 0xcb220d1d00000000, 0x38b2ff2b00000000, 0xe14022ab00000000, + 0x12d0d09d00000000, 0x0761c7c600000000, 0xf4f135f000000000, + 0xf4820d1c00000000, 0x0712ff2a00000000, 0x12a3e87100000000, + 0xe1331a4700000000, 0x38c1c7c700000000, 0xcb5135f100000000, + 0xdee022aa00000000, 0x2d70d09c00000000, 0xba01c4c100000000, + 0x499136f700000000, 0x5c2021ac00000000, 0xafb0d39a00000000, + 0x76420e1a00000000, 0x85d2fc2c00000000, 0x9063eb7700000000, + 0x63f3194100000000, 0x638021ad00000000, 0x9010d39b00000000, + 0x85a1c4c000000000, 0x763136f600000000, 0xafc3eb7600000000, + 0x5c53194000000000, 0x49e20e1b00000000, 0xba72fc2d00000000, + 0x08020f1800000000, 0xfb92fd2e00000000, 0xee23ea7500000000, + 0x1db3184300000000, 0xc441c5c300000000, 0x37d137f500000000, + 0x226020ae00000000, 0xd1f0d29800000000, 0xd183ea7400000000, + 0x2213184200000000, 0x37a20f1900000000, 0xc432fd2f00000000, + 0x1dc020af00000000, 0xee50d29900000000, 0xfbe1c5c200000000, + 0x087137f400000000}, + {0x0000000000000000, 0x3651822400000000, 0x6ca2044900000000, + 0x5af3866d00000000, 0xd844099200000000, 0xee158bb600000000, + 0xb4e60ddb00000000, 0x82b78fff00000000, 0xf18f63ff00000000, + 0xc7dee1db00000000, 0x9d2d67b600000000, 0xab7ce59200000000, + 0x29cb6a6d00000000, 0x1f9ae84900000000, 0x45696e2400000000, + 0x7338ec0000000000, 0xa319b62500000000, 0x9548340100000000, + 0xcfbbb26c00000000, 0xf9ea304800000000, 0x7b5dbfb700000000, + 0x4d0c3d9300000000, 0x17ffbbfe00000000, 0x21ae39da00000000, + 0x5296d5da00000000, 0x64c757fe00000000, 0x3e34d19300000000, + 0x086553b700000000, 0x8ad2dc4800000000, 0xbc835e6c00000000, + 0xe670d80100000000, 0xd0215a2500000000, 0x46336c4b00000000, + 0x7062ee6f00000000, 0x2a91680200000000, 0x1cc0ea2600000000, + 0x9e7765d900000000, 0xa826e7fd00000000, 0xf2d5619000000000, + 0xc484e3b400000000, 0xb7bc0fb400000000, 0x81ed8d9000000000, + 0xdb1e0bfd00000000, 0xed4f89d900000000, 0x6ff8062600000000, + 0x59a9840200000000, 0x035a026f00000000, 0x350b804b00000000, + 0xe52ada6e00000000, 0xd37b584a00000000, 0x8988de2700000000, + 0xbfd95c0300000000, 0x3d6ed3fc00000000, 0x0b3f51d800000000, + 0x51ccd7b500000000, 0x679d559100000000, 0x14a5b99100000000, + 0x22f43bb500000000, 0x7807bdd800000000, 0x4e563ffc00000000, + 0xcce1b00300000000, 0xfab0322700000000, 0xa043b44a00000000, + 0x9612366e00000000, 0x8c66d89600000000, 0xba375ab200000000, + 0xe0c4dcdf00000000, 0xd6955efb00000000, 0x5422d10400000000, + 0x6273532000000000, 0x3880d54d00000000, 0x0ed1576900000000, + 0x7de9bb6900000000, 0x4bb8394d00000000, 0x114bbf2000000000, + 0x271a3d0400000000, 0xa5adb2fb00000000, 0x93fc30df00000000, + 0xc90fb6b200000000, 0xff5e349600000000, 0x2f7f6eb300000000, + 0x192eec9700000000, 0x43dd6afa00000000, 0x758ce8de00000000, + 0xf73b672100000000, 0xc16ae50500000000, 0x9b99636800000000, + 0xadc8e14c00000000, 0xdef00d4c00000000, 0xe8a18f6800000000, + 0xb252090500000000, 0x84038b2100000000, 0x06b404de00000000, + 0x30e586fa00000000, 0x6a16009700000000, 0x5c4782b300000000, + 0xca55b4dd00000000, 0xfc0436f900000000, 0xa6f7b09400000000, + 0x90a632b000000000, 0x1211bd4f00000000, 0x24403f6b00000000, + 0x7eb3b90600000000, 0x48e23b2200000000, 0x3bdad72200000000, + 0x0d8b550600000000, 0x5778d36b00000000, 0x6129514f00000000, + 0xe39edeb000000000, 0xd5cf5c9400000000, 0x8f3cdaf900000000, + 0xb96d58dd00000000, 0x694c02f800000000, 0x5f1d80dc00000000, + 0x05ee06b100000000, 0x33bf849500000000, 0xb1080b6a00000000, + 0x8759894e00000000, 0xddaa0f2300000000, 0xebfb8d0700000000, + 0x98c3610700000000, 0xae92e32300000000, 0xf461654e00000000, + 0xc230e76a00000000, 0x4087689500000000, 0x76d6eab100000000, + 0x2c256cdc00000000, 0x1a74eef800000000, 0x59cbc1f600000000, + 0x6f9a43d200000000, 0x3569c5bf00000000, 0x0338479b00000000, + 0x818fc86400000000, 0xb7de4a4000000000, 0xed2dcc2d00000000, + 0xdb7c4e0900000000, 0xa844a20900000000, 0x9e15202d00000000, + 0xc4e6a64000000000, 0xf2b7246400000000, 0x7000ab9b00000000, + 0x465129bf00000000, 0x1ca2afd200000000, 0x2af32df600000000, + 0xfad277d300000000, 0xcc83f5f700000000, 0x9670739a00000000, + 0xa021f1be00000000, 0x22967e4100000000, 0x14c7fc6500000000, + 0x4e347a0800000000, 0x7865f82c00000000, 0x0b5d142c00000000, + 0x3d0c960800000000, 0x67ff106500000000, 0x51ae924100000000, + 0xd3191dbe00000000, 0xe5489f9a00000000, 0xbfbb19f700000000, + 0x89ea9bd300000000, 0x1ff8adbd00000000, 0x29a92f9900000000, + 0x735aa9f400000000, 0x450b2bd000000000, 0xc7bca42f00000000, + 0xf1ed260b00000000, 0xab1ea06600000000, 0x9d4f224200000000, + 0xee77ce4200000000, 0xd8264c6600000000, 0x82d5ca0b00000000, + 0xb484482f00000000, 0x3633c7d000000000, 0x006245f400000000, + 0x5a91c39900000000, 0x6cc041bd00000000, 0xbce11b9800000000, + 0x8ab099bc00000000, 0xd0431fd100000000, 0xe6129df500000000, + 0x64a5120a00000000, 0x52f4902e00000000, 0x0807164300000000, + 0x3e56946700000000, 0x4d6e786700000000, 0x7b3ffa4300000000, + 0x21cc7c2e00000000, 0x179dfe0a00000000, 0x952a71f500000000, + 0xa37bf3d100000000, 0xf98875bc00000000, 0xcfd9f79800000000, + 0xd5ad196000000000, 0xe3fc9b4400000000, 0xb90f1d2900000000, + 0x8f5e9f0d00000000, 0x0de910f200000000, 0x3bb892d600000000, + 0x614b14bb00000000, 0x571a969f00000000, 0x24227a9f00000000, + 0x1273f8bb00000000, 0x48807ed600000000, 0x7ed1fcf200000000, + 0xfc66730d00000000, 0xca37f12900000000, 0x90c4774400000000, + 0xa695f56000000000, 0x76b4af4500000000, 0x40e52d6100000000, + 0x1a16ab0c00000000, 0x2c47292800000000, 0xaef0a6d700000000, + 0x98a124f300000000, 0xc252a29e00000000, 0xf40320ba00000000, + 0x873bccba00000000, 0xb16a4e9e00000000, 0xeb99c8f300000000, + 0xddc84ad700000000, 0x5f7fc52800000000, 0x692e470c00000000, + 0x33ddc16100000000, 0x058c434500000000, 0x939e752b00000000, + 0xa5cff70f00000000, 0xff3c716200000000, 0xc96df34600000000, + 0x4bda7cb900000000, 0x7d8bfe9d00000000, 0x277878f000000000, + 0x1129fad400000000, 0x621116d400000000, 0x544094f000000000, + 0x0eb3129d00000000, 0x38e290b900000000, 0xba551f4600000000, + 0x8c049d6200000000, 0xd6f71b0f00000000, 0xe0a6992b00000000, + 0x3087c30e00000000, 0x06d6412a00000000, 0x5c25c74700000000, + 0x6a74456300000000, 0xe8c3ca9c00000000, 0xde9248b800000000, + 0x8461ced500000000, 0xb2304cf100000000, 0xc108a0f100000000, + 0xf75922d500000000, 0xadaaa4b800000000, 0x9bfb269c00000000, + 0x194ca96300000000, 0x2f1d2b4700000000, 0x75eead2a00000000, + 0x43bf2f0e00000000}, + {0x0000000000000000, 0xc8179ecf00000000, 0xd1294d4400000000, + 0x193ed38b00000000, 0xa2539a8800000000, 0x6a44044700000000, + 0x737ad7cc00000000, 0xbb6d490300000000, 0x05a145ca00000000, + 0xcdb6db0500000000, 0xd488088e00000000, 0x1c9f964100000000, + 0xa7f2df4200000000, 0x6fe5418d00000000, 0x76db920600000000, + 0xbecc0cc900000000, 0x4b44fa4f00000000, 0x8353648000000000, + 0x9a6db70b00000000, 0x527a29c400000000, 0xe91760c700000000, + 0x2100fe0800000000, 0x383e2d8300000000, 0xf029b34c00000000, + 0x4ee5bf8500000000, 0x86f2214a00000000, 0x9fccf2c100000000, + 0x57db6c0e00000000, 0xecb6250d00000000, 0x24a1bbc200000000, + 0x3d9f684900000000, 0xf588f68600000000, 0x9688f49f00000000, + 0x5e9f6a5000000000, 0x47a1b9db00000000, 0x8fb6271400000000, + 0x34db6e1700000000, 0xfcccf0d800000000, 0xe5f2235300000000, + 0x2de5bd9c00000000, 0x9329b15500000000, 0x5b3e2f9a00000000, + 0x4200fc1100000000, 0x8a1762de00000000, 0x317a2bdd00000000, + 0xf96db51200000000, 0xe053669900000000, 0x2844f85600000000, + 0xddcc0ed000000000, 0x15db901f00000000, 0x0ce5439400000000, + 0xc4f2dd5b00000000, 0x7f9f945800000000, 0xb7880a9700000000, + 0xaeb6d91c00000000, 0x66a147d300000000, 0xd86d4b1a00000000, + 0x107ad5d500000000, 0x0944065e00000000, 0xc153989100000000, + 0x7a3ed19200000000, 0xb2294f5d00000000, 0xab179cd600000000, + 0x6300021900000000, 0x6d1798e400000000, 0xa500062b00000000, + 0xbc3ed5a000000000, 0x74294b6f00000000, 0xcf44026c00000000, + 0x07539ca300000000, 0x1e6d4f2800000000, 0xd67ad1e700000000, + 0x68b6dd2e00000000, 0xa0a143e100000000, 0xb99f906a00000000, + 0x71880ea500000000, 0xcae547a600000000, 0x02f2d96900000000, + 0x1bcc0ae200000000, 0xd3db942d00000000, 0x265362ab00000000, + 0xee44fc6400000000, 0xf77a2fef00000000, 0x3f6db12000000000, + 0x8400f82300000000, 0x4c1766ec00000000, 0x5529b56700000000, + 0x9d3e2ba800000000, 0x23f2276100000000, 0xebe5b9ae00000000, + 0xf2db6a2500000000, 0x3accf4ea00000000, 0x81a1bde900000000, + 0x49b6232600000000, 0x5088f0ad00000000, 0x989f6e6200000000, + 0xfb9f6c7b00000000, 0x3388f2b400000000, 0x2ab6213f00000000, + 0xe2a1bff000000000, 0x59ccf6f300000000, 0x91db683c00000000, + 0x88e5bbb700000000, 0x40f2257800000000, 0xfe3e29b100000000, + 0x3629b77e00000000, 0x2f1764f500000000, 0xe700fa3a00000000, + 0x5c6db33900000000, 0x947a2df600000000, 0x8d44fe7d00000000, + 0x455360b200000000, 0xb0db963400000000, 0x78cc08fb00000000, + 0x61f2db7000000000, 0xa9e545bf00000000, 0x12880cbc00000000, + 0xda9f927300000000, 0xc3a141f800000000, 0x0bb6df3700000000, + 0xb57ad3fe00000000, 0x7d6d4d3100000000, 0x64539eba00000000, + 0xac44007500000000, 0x1729497600000000, 0xdf3ed7b900000000, + 0xc600043200000000, 0x0e179afd00000000, 0x9b28411200000000, + 0x533fdfdd00000000, 0x4a010c5600000000, 0x8216929900000000, + 0x397bdb9a00000000, 0xf16c455500000000, 0xe85296de00000000, + 0x2045081100000000, 0x9e8904d800000000, 0x569e9a1700000000, + 0x4fa0499c00000000, 0x87b7d75300000000, 0x3cda9e5000000000, + 0xf4cd009f00000000, 0xedf3d31400000000, 0x25e44ddb00000000, + 0xd06cbb5d00000000, 0x187b259200000000, 0x0145f61900000000, + 0xc95268d600000000, 0x723f21d500000000, 0xba28bf1a00000000, + 0xa3166c9100000000, 0x6b01f25e00000000, 0xd5cdfe9700000000, + 0x1dda605800000000, 0x04e4b3d300000000, 0xccf32d1c00000000, + 0x779e641f00000000, 0xbf89fad000000000, 0xa6b7295b00000000, + 0x6ea0b79400000000, 0x0da0b58d00000000, 0xc5b72b4200000000, + 0xdc89f8c900000000, 0x149e660600000000, 0xaff32f0500000000, + 0x67e4b1ca00000000, 0x7eda624100000000, 0xb6cdfc8e00000000, + 0x0801f04700000000, 0xc0166e8800000000, 0xd928bd0300000000, + 0x113f23cc00000000, 0xaa526acf00000000, 0x6245f40000000000, + 0x7b7b278b00000000, 0xb36cb94400000000, 0x46e44fc200000000, + 0x8ef3d10d00000000, 0x97cd028600000000, 0x5fda9c4900000000, + 0xe4b7d54a00000000, 0x2ca04b8500000000, 0x359e980e00000000, + 0xfd8906c100000000, 0x43450a0800000000, 0x8b5294c700000000, + 0x926c474c00000000, 0x5a7bd98300000000, 0xe116908000000000, + 0x29010e4f00000000, 0x303fddc400000000, 0xf828430b00000000, + 0xf63fd9f600000000, 0x3e28473900000000, 0x271694b200000000, + 0xef010a7d00000000, 0x546c437e00000000, 0x9c7bddb100000000, + 0x85450e3a00000000, 0x4d5290f500000000, 0xf39e9c3c00000000, + 0x3b8902f300000000, 0x22b7d17800000000, 0xeaa04fb700000000, + 0x51cd06b400000000, 0x99da987b00000000, 0x80e44bf000000000, + 0x48f3d53f00000000, 0xbd7b23b900000000, 0x756cbd7600000000, + 0x6c526efd00000000, 0xa445f03200000000, 0x1f28b93100000000, + 0xd73f27fe00000000, 0xce01f47500000000, 0x06166aba00000000, + 0xb8da667300000000, 0x70cdf8bc00000000, 0x69f32b3700000000, + 0xa1e4b5f800000000, 0x1a89fcfb00000000, 0xd29e623400000000, + 0xcba0b1bf00000000, 0x03b72f7000000000, 0x60b72d6900000000, + 0xa8a0b3a600000000, 0xb19e602d00000000, 0x7989fee200000000, + 0xc2e4b7e100000000, 0x0af3292e00000000, 0x13cdfaa500000000, + 0xdbda646a00000000, 0x651668a300000000, 0xad01f66c00000000, + 0xb43f25e700000000, 0x7c28bb2800000000, 0xc745f22b00000000, + 0x0f526ce400000000, 0x166cbf6f00000000, 0xde7b21a000000000, + 0x2bf3d72600000000, 0xe3e449e900000000, 0xfada9a6200000000, + 0x32cd04ad00000000, 0x89a04dae00000000, 0x41b7d36100000000, + 0x588900ea00000000, 0x909e9e2500000000, 0x2e5292ec00000000, + 0xe6450c2300000000, 0xff7bdfa800000000, 0x376c416700000000, + 0x8c01086400000000, 0x441696ab00000000, 0x5d28452000000000, + 0x953fdbef00000000}, + {0x0000000000000000, 0x95d4709500000000, 0x6baf90f100000000, + 0xfe7be06400000000, 0x9758503800000000, 0x028c20ad00000000, + 0xfcf7c0c900000000, 0x6923b05c00000000, 0x2eb1a07000000000, + 0xbb65d0e500000000, 0x451e308100000000, 0xd0ca401400000000, + 0xb9e9f04800000000, 0x2c3d80dd00000000, 0xd24660b900000000, + 0x4792102c00000000, 0x5c6241e100000000, 0xc9b6317400000000, + 0x37cdd11000000000, 0xa219a18500000000, 0xcb3a11d900000000, + 0x5eee614c00000000, 0xa095812800000000, 0x3541f1bd00000000, + 0x72d3e19100000000, 0xe707910400000000, 0x197c716000000000, + 0x8ca801f500000000, 0xe58bb1a900000000, 0x705fc13c00000000, + 0x8e24215800000000, 0x1bf051cd00000000, 0xf9c2f31900000000, + 0x6c16838c00000000, 0x926d63e800000000, 0x07b9137d00000000, + 0x6e9aa32100000000, 0xfb4ed3b400000000, 0x053533d000000000, + 0x90e1434500000000, 0xd773536900000000, 0x42a723fc00000000, + 0xbcdcc39800000000, 0x2908b30d00000000, 0x402b035100000000, + 0xd5ff73c400000000, 0x2b8493a000000000, 0xbe50e33500000000, + 0xa5a0b2f800000000, 0x3074c26d00000000, 0xce0f220900000000, + 0x5bdb529c00000000, 0x32f8e2c000000000, 0xa72c925500000000, + 0x5957723100000000, 0xcc8302a400000000, 0x8b11128800000000, + 0x1ec5621d00000000, 0xe0be827900000000, 0x756af2ec00000000, + 0x1c4942b000000000, 0x899d322500000000, 0x77e6d24100000000, + 0xe232a2d400000000, 0xf285e73300000000, 0x675197a600000000, + 0x992a77c200000000, 0x0cfe075700000000, 0x65ddb70b00000000, + 0xf009c79e00000000, 0x0e7227fa00000000, 0x9ba6576f00000000, + 0xdc34474300000000, 0x49e037d600000000, 0xb79bd7b200000000, + 0x224fa72700000000, 0x4b6c177b00000000, 0xdeb867ee00000000, + 0x20c3878a00000000, 0xb517f71f00000000, 0xaee7a6d200000000, + 0x3b33d64700000000, 0xc548362300000000, 0x509c46b600000000, + 0x39bff6ea00000000, 0xac6b867f00000000, 0x5210661b00000000, + 0xc7c4168e00000000, 0x805606a200000000, 0x1582763700000000, + 0xebf9965300000000, 0x7e2de6c600000000, 0x170e569a00000000, + 0x82da260f00000000, 0x7ca1c66b00000000, 0xe975b6fe00000000, + 0x0b47142a00000000, 0x9e9364bf00000000, 0x60e884db00000000, + 0xf53cf44e00000000, 0x9c1f441200000000, 0x09cb348700000000, + 0xf7b0d4e300000000, 0x6264a47600000000, 0x25f6b45a00000000, + 0xb022c4cf00000000, 0x4e5924ab00000000, 0xdb8d543e00000000, + 0xb2aee46200000000, 0x277a94f700000000, 0xd901749300000000, + 0x4cd5040600000000, 0x572555cb00000000, 0xc2f1255e00000000, + 0x3c8ac53a00000000, 0xa95eb5af00000000, 0xc07d05f300000000, + 0x55a9756600000000, 0xabd2950200000000, 0x3e06e59700000000, + 0x7994f5bb00000000, 0xec40852e00000000, 0x123b654a00000000, + 0x87ef15df00000000, 0xeecca58300000000, 0x7b18d51600000000, + 0x8563357200000000, 0x10b745e700000000, 0xe40bcf6700000000, + 0x71dfbff200000000, 0x8fa45f9600000000, 0x1a702f0300000000, + 0x73539f5f00000000, 0xe687efca00000000, 0x18fc0fae00000000, + 0x8d287f3b00000000, 0xcaba6f1700000000, 0x5f6e1f8200000000, + 0xa115ffe600000000, 0x34c18f7300000000, 0x5de23f2f00000000, + 0xc8364fba00000000, 0x364dafde00000000, 0xa399df4b00000000, + 0xb8698e8600000000, 0x2dbdfe1300000000, 0xd3c61e7700000000, + 0x46126ee200000000, 0x2f31debe00000000, 0xbae5ae2b00000000, + 0x449e4e4f00000000, 0xd14a3eda00000000, 0x96d82ef600000000, + 0x030c5e6300000000, 0xfd77be0700000000, 0x68a3ce9200000000, + 0x01807ece00000000, 0x94540e5b00000000, 0x6a2fee3f00000000, + 0xfffb9eaa00000000, 0x1dc93c7e00000000, 0x881d4ceb00000000, + 0x7666ac8f00000000, 0xe3b2dc1a00000000, 0x8a916c4600000000, + 0x1f451cd300000000, 0xe13efcb700000000, 0x74ea8c2200000000, + 0x33789c0e00000000, 0xa6acec9b00000000, 0x58d70cff00000000, + 0xcd037c6a00000000, 0xa420cc3600000000, 0x31f4bca300000000, + 0xcf8f5cc700000000, 0x5a5b2c5200000000, 0x41ab7d9f00000000, + 0xd47f0d0a00000000, 0x2a04ed6e00000000, 0xbfd09dfb00000000, + 0xd6f32da700000000, 0x43275d3200000000, 0xbd5cbd5600000000, + 0x2888cdc300000000, 0x6f1addef00000000, 0xfacead7a00000000, + 0x04b54d1e00000000, 0x91613d8b00000000, 0xf8428dd700000000, + 0x6d96fd4200000000, 0x93ed1d2600000000, 0x06396db300000000, + 0x168e285400000000, 0x835a58c100000000, 0x7d21b8a500000000, + 0xe8f5c83000000000, 0x81d6786c00000000, 0x140208f900000000, + 0xea79e89d00000000, 0x7fad980800000000, 0x383f882400000000, + 0xadebf8b100000000, 0x539018d500000000, 0xc644684000000000, + 0xaf67d81c00000000, 0x3ab3a88900000000, 0xc4c848ed00000000, + 0x511c387800000000, 0x4aec69b500000000, 0xdf38192000000000, + 0x2143f94400000000, 0xb49789d100000000, 0xddb4398d00000000, + 0x4860491800000000, 0xb61ba97c00000000, 0x23cfd9e900000000, + 0x645dc9c500000000, 0xf189b95000000000, 0x0ff2593400000000, + 0x9a2629a100000000, 0xf30599fd00000000, 0x66d1e96800000000, + 0x98aa090c00000000, 0x0d7e799900000000, 0xef4cdb4d00000000, + 0x7a98abd800000000, 0x84e34bbc00000000, 0x11373b2900000000, + 0x78148b7500000000, 0xedc0fbe000000000, 0x13bb1b8400000000, + 0x866f6b1100000000, 0xc1fd7b3d00000000, 0x54290ba800000000, + 0xaa52ebcc00000000, 0x3f869b5900000000, 0x56a52b0500000000, + 0xc3715b9000000000, 0x3d0abbf400000000, 0xa8decb6100000000, + 0xb32e9aac00000000, 0x26faea3900000000, 0xd8810a5d00000000, + 0x4d557ac800000000, 0x2476ca9400000000, 0xb1a2ba0100000000, + 0x4fd95a6500000000, 0xda0d2af000000000, 0x9d9f3adc00000000, + 0x084b4a4900000000, 0xf630aa2d00000000, 0x63e4dab800000000, + 0x0ac76ae400000000, 0x9f131a7100000000, 0x6168fa1500000000, + 0xf4bc8a8000000000}, + {0x0000000000000000, 0x1f17f08000000000, 0x7f2891da00000000, + 0x603f615a00000000, 0xbf56536e00000000, 0xa041a3ee00000000, + 0xc07ec2b400000000, 0xdf69323400000000, 0x7eada6dc00000000, + 0x61ba565c00000000, 0x0185370600000000, 0x1e92c78600000000, + 0xc1fbf5b200000000, 0xdeec053200000000, 0xbed3646800000000, + 0xa1c494e800000000, 0xbd5c3c6200000000, 0xa24bcce200000000, + 0xc274adb800000000, 0xdd635d3800000000, 0x020a6f0c00000000, + 0x1d1d9f8c00000000, 0x7d22fed600000000, 0x62350e5600000000, + 0xc3f19abe00000000, 0xdce66a3e00000000, 0xbcd90b6400000000, + 0xa3cefbe400000000, 0x7ca7c9d000000000, 0x63b0395000000000, + 0x038f580a00000000, 0x1c98a88a00000000, 0x7ab978c400000000, + 0x65ae884400000000, 0x0591e91e00000000, 0x1a86199e00000000, + 0xc5ef2baa00000000, 0xdaf8db2a00000000, 0xbac7ba7000000000, + 0xa5d04af000000000, 0x0414de1800000000, 0x1b032e9800000000, + 0x7b3c4fc200000000, 0x642bbf4200000000, 0xbb428d7600000000, + 0xa4557df600000000, 0xc46a1cac00000000, 0xdb7dec2c00000000, + 0xc7e544a600000000, 0xd8f2b42600000000, 0xb8cdd57c00000000, + 0xa7da25fc00000000, 0x78b317c800000000, 0x67a4e74800000000, + 0x079b861200000000, 0x188c769200000000, 0xb948e27a00000000, + 0xa65f12fa00000000, 0xc66073a000000000, 0xd977832000000000, + 0x061eb11400000000, 0x1909419400000000, 0x793620ce00000000, + 0x6621d04e00000000, 0xb574805300000000, 0xaa6370d300000000, + 0xca5c118900000000, 0xd54be10900000000, 0x0a22d33d00000000, + 0x153523bd00000000, 0x750a42e700000000, 0x6a1db26700000000, + 0xcbd9268f00000000, 0xd4ced60f00000000, 0xb4f1b75500000000, + 0xabe647d500000000, 0x748f75e100000000, 0x6b98856100000000, + 0x0ba7e43b00000000, 0x14b014bb00000000, 0x0828bc3100000000, + 0x173f4cb100000000, 0x77002deb00000000, 0x6817dd6b00000000, + 0xb77eef5f00000000, 0xa8691fdf00000000, 0xc8567e8500000000, + 0xd7418e0500000000, 0x76851aed00000000, 0x6992ea6d00000000, + 0x09ad8b3700000000, 0x16ba7bb700000000, 0xc9d3498300000000, + 0xd6c4b90300000000, 0xb6fbd85900000000, 0xa9ec28d900000000, + 0xcfcdf89700000000, 0xd0da081700000000, 0xb0e5694d00000000, + 0xaff299cd00000000, 0x709babf900000000, 0x6f8c5b7900000000, + 0x0fb33a2300000000, 0x10a4caa300000000, 0xb1605e4b00000000, + 0xae77aecb00000000, 0xce48cf9100000000, 0xd15f3f1100000000, + 0x0e360d2500000000, 0x1121fda500000000, 0x711e9cff00000000, + 0x6e096c7f00000000, 0x7291c4f500000000, 0x6d86347500000000, + 0x0db9552f00000000, 0x12aea5af00000000, 0xcdc7979b00000000, + 0xd2d0671b00000000, 0xb2ef064100000000, 0xadf8f6c100000000, + 0x0c3c622900000000, 0x132b92a900000000, 0x7314f3f300000000, + 0x6c03037300000000, 0xb36a314700000000, 0xac7dc1c700000000, + 0xcc42a09d00000000, 0xd355501d00000000, 0x6ae900a700000000, + 0x75fef02700000000, 0x15c1917d00000000, 0x0ad661fd00000000, + 0xd5bf53c900000000, 0xcaa8a34900000000, 0xaa97c21300000000, + 0xb580329300000000, 0x1444a67b00000000, 0x0b5356fb00000000, + 0x6b6c37a100000000, 0x747bc72100000000, 0xab12f51500000000, + 0xb405059500000000, 0xd43a64cf00000000, 0xcb2d944f00000000, + 0xd7b53cc500000000, 0xc8a2cc4500000000, 0xa89dad1f00000000, + 0xb78a5d9f00000000, 0x68e36fab00000000, 0x77f49f2b00000000, + 0x17cbfe7100000000, 0x08dc0ef100000000, 0xa9189a1900000000, + 0xb60f6a9900000000, 0xd6300bc300000000, 0xc927fb4300000000, + 0x164ec97700000000, 0x095939f700000000, 0x696658ad00000000, + 0x7671a82d00000000, 0x1050786300000000, 0x0f4788e300000000, + 0x6f78e9b900000000, 0x706f193900000000, 0xaf062b0d00000000, + 0xb011db8d00000000, 0xd02ebad700000000, 0xcf394a5700000000, + 0x6efddebf00000000, 0x71ea2e3f00000000, 0x11d54f6500000000, + 0x0ec2bfe500000000, 0xd1ab8dd100000000, 0xcebc7d5100000000, + 0xae831c0b00000000, 0xb194ec8b00000000, 0xad0c440100000000, + 0xb21bb48100000000, 0xd224d5db00000000, 0xcd33255b00000000, + 0x125a176f00000000, 0x0d4de7ef00000000, 0x6d7286b500000000, + 0x7265763500000000, 0xd3a1e2dd00000000, 0xccb6125d00000000, + 0xac89730700000000, 0xb39e838700000000, 0x6cf7b1b300000000, + 0x73e0413300000000, 0x13df206900000000, 0x0cc8d0e900000000, + 0xdf9d80f400000000, 0xc08a707400000000, 0xa0b5112e00000000, + 0xbfa2e1ae00000000, 0x60cbd39a00000000, 0x7fdc231a00000000, + 0x1fe3424000000000, 0x00f4b2c000000000, 0xa130262800000000, + 0xbe27d6a800000000, 0xde18b7f200000000, 0xc10f477200000000, + 0x1e66754600000000, 0x017185c600000000, 0x614ee49c00000000, + 0x7e59141c00000000, 0x62c1bc9600000000, 0x7dd64c1600000000, + 0x1de92d4c00000000, 0x02feddcc00000000, 0xdd97eff800000000, + 0xc2801f7800000000, 0xa2bf7e2200000000, 0xbda88ea200000000, + 0x1c6c1a4a00000000, 0x037beaca00000000, 0x63448b9000000000, + 0x7c537b1000000000, 0xa33a492400000000, 0xbc2db9a400000000, + 0xdc12d8fe00000000, 0xc305287e00000000, 0xa524f83000000000, + 0xba3308b000000000, 0xda0c69ea00000000, 0xc51b996a00000000, + 0x1a72ab5e00000000, 0x05655bde00000000, 0x655a3a8400000000, + 0x7a4dca0400000000, 0xdb895eec00000000, 0xc49eae6c00000000, + 0xa4a1cf3600000000, 0xbbb63fb600000000, 0x64df0d8200000000, + 0x7bc8fd0200000000, 0x1bf79c5800000000, 0x04e06cd800000000, + 0x1878c45200000000, 0x076f34d200000000, 0x6750558800000000, + 0x7847a50800000000, 0xa72e973c00000000, 0xb83967bc00000000, + 0xd80606e600000000, 0xc711f66600000000, 0x66d5628e00000000, + 0x79c2920e00000000, 0x19fdf35400000000, 0x06ea03d400000000, + 0xd98331e000000000, 0xc694c16000000000, 0xa6aba03a00000000, + 0xb9bc50ba00000000}, + {0x0000000000000000, 0xe2fd888d00000000, 0x85fd60c000000000, + 0x6700e84d00000000, 0x4bfdb05b00000000, 0xa90038d600000000, + 0xce00d09b00000000, 0x2cfd581600000000, 0x96fa61b700000000, + 0x7407e93a00000000, 0x1307017700000000, 0xf1fa89fa00000000, + 0xdd07d1ec00000000, 0x3ffa596100000000, 0x58fab12c00000000, + 0xba0739a100000000, 0x6df3b2b500000000, 0x8f0e3a3800000000, + 0xe80ed27500000000, 0x0af35af800000000, 0x260e02ee00000000, + 0xc4f38a6300000000, 0xa3f3622e00000000, 0x410eeaa300000000, + 0xfb09d30200000000, 0x19f45b8f00000000, 0x7ef4b3c200000000, + 0x9c093b4f00000000, 0xb0f4635900000000, 0x5209ebd400000000, + 0x3509039900000000, 0xd7f48b1400000000, 0x9be014b000000000, + 0x791d9c3d00000000, 0x1e1d747000000000, 0xfce0fcfd00000000, + 0xd01da4eb00000000, 0x32e02c6600000000, 0x55e0c42b00000000, + 0xb71d4ca600000000, 0x0d1a750700000000, 0xefe7fd8a00000000, + 0x88e715c700000000, 0x6a1a9d4a00000000, 0x46e7c55c00000000, + 0xa41a4dd100000000, 0xc31aa59c00000000, 0x21e72d1100000000, + 0xf613a60500000000, 0x14ee2e8800000000, 0x73eec6c500000000, + 0x91134e4800000000, 0xbdee165e00000000, 0x5f139ed300000000, + 0x3813769e00000000, 0xdaeefe1300000000, 0x60e9c7b200000000, + 0x82144f3f00000000, 0xe514a77200000000, 0x07e92fff00000000, + 0x2b1477e900000000, 0xc9e9ff6400000000, 0xaee9172900000000, + 0x4c149fa400000000, 0x77c758bb00000000, 0x953ad03600000000, + 0xf23a387b00000000, 0x10c7b0f600000000, 0x3c3ae8e000000000, + 0xdec7606d00000000, 0xb9c7882000000000, 0x5b3a00ad00000000, + 0xe13d390c00000000, 0x03c0b18100000000, 0x64c059cc00000000, + 0x863dd14100000000, 0xaac0895700000000, 0x483d01da00000000, + 0x2f3de99700000000, 0xcdc0611a00000000, 0x1a34ea0e00000000, + 0xf8c9628300000000, 0x9fc98ace00000000, 0x7d34024300000000, + 0x51c95a5500000000, 0xb334d2d800000000, 0xd4343a9500000000, + 0x36c9b21800000000, 0x8cce8bb900000000, 0x6e33033400000000, + 0x0933eb7900000000, 0xebce63f400000000, 0xc7333be200000000, + 0x25ceb36f00000000, 0x42ce5b2200000000, 0xa033d3af00000000, + 0xec274c0b00000000, 0x0edac48600000000, 0x69da2ccb00000000, + 0x8b27a44600000000, 0xa7dafc5000000000, 0x452774dd00000000, + 0x22279c9000000000, 0xc0da141d00000000, 0x7add2dbc00000000, + 0x9820a53100000000, 0xff204d7c00000000, 0x1dddc5f100000000, + 0x31209de700000000, 0xd3dd156a00000000, 0xb4ddfd2700000000, + 0x562075aa00000000, 0x81d4febe00000000, 0x6329763300000000, + 0x04299e7e00000000, 0xe6d416f300000000, 0xca294ee500000000, + 0x28d4c66800000000, 0x4fd42e2500000000, 0xad29a6a800000000, + 0x172e9f0900000000, 0xf5d3178400000000, 0x92d3ffc900000000, + 0x702e774400000000, 0x5cd32f5200000000, 0xbe2ea7df00000000, + 0xd92e4f9200000000, 0x3bd3c71f00000000, 0xaf88c0ad00000000, + 0x4d75482000000000, 0x2a75a06d00000000, 0xc88828e000000000, + 0xe47570f600000000, 0x0688f87b00000000, 0x6188103600000000, + 0x837598bb00000000, 0x3972a11a00000000, 0xdb8f299700000000, + 0xbc8fc1da00000000, 0x5e72495700000000, 0x728f114100000000, + 0x907299cc00000000, 0xf772718100000000, 0x158ff90c00000000, + 0xc27b721800000000, 0x2086fa9500000000, 0x478612d800000000, + 0xa57b9a5500000000, 0x8986c24300000000, 0x6b7b4ace00000000, + 0x0c7ba28300000000, 0xee862a0e00000000, 0x548113af00000000, + 0xb67c9b2200000000, 0xd17c736f00000000, 0x3381fbe200000000, + 0x1f7ca3f400000000, 0xfd812b7900000000, 0x9a81c33400000000, + 0x787c4bb900000000, 0x3468d41d00000000, 0xd6955c9000000000, + 0xb195b4dd00000000, 0x53683c5000000000, 0x7f95644600000000, + 0x9d68eccb00000000, 0xfa68048600000000, 0x18958c0b00000000, + 0xa292b5aa00000000, 0x406f3d2700000000, 0x276fd56a00000000, + 0xc5925de700000000, 0xe96f05f100000000, 0x0b928d7c00000000, + 0x6c92653100000000, 0x8e6fedbc00000000, 0x599b66a800000000, + 0xbb66ee2500000000, 0xdc66066800000000, 0x3e9b8ee500000000, + 0x1266d6f300000000, 0xf09b5e7e00000000, 0x979bb63300000000, + 0x75663ebe00000000, 0xcf61071f00000000, 0x2d9c8f9200000000, + 0x4a9c67df00000000, 0xa861ef5200000000, 0x849cb74400000000, + 0x66613fc900000000, 0x0161d78400000000, 0xe39c5f0900000000, + 0xd84f981600000000, 0x3ab2109b00000000, 0x5db2f8d600000000, + 0xbf4f705b00000000, 0x93b2284d00000000, 0x714fa0c000000000, + 0x164f488d00000000, 0xf4b2c00000000000, 0x4eb5f9a100000000, + 0xac48712c00000000, 0xcb48996100000000, 0x29b511ec00000000, + 0x054849fa00000000, 0xe7b5c17700000000, 0x80b5293a00000000, + 0x6248a1b700000000, 0xb5bc2aa300000000, 0x5741a22e00000000, + 0x30414a6300000000, 0xd2bcc2ee00000000, 0xfe419af800000000, + 0x1cbc127500000000, 0x7bbcfa3800000000, 0x994172b500000000, + 0x23464b1400000000, 0xc1bbc39900000000, 0xa6bb2bd400000000, + 0x4446a35900000000, 0x68bbfb4f00000000, 0x8a4673c200000000, + 0xed469b8f00000000, 0x0fbb130200000000, 0x43af8ca600000000, + 0xa152042b00000000, 0xc652ec6600000000, 0x24af64eb00000000, + 0x08523cfd00000000, 0xeaafb47000000000, 0x8daf5c3d00000000, + 0x6f52d4b000000000, 0xd555ed1100000000, 0x37a8659c00000000, + 0x50a88dd100000000, 0xb255055c00000000, 0x9ea85d4a00000000, + 0x7c55d5c700000000, 0x1b553d8a00000000, 0xf9a8b50700000000, + 0x2e5c3e1300000000, 0xcca1b69e00000000, 0xaba15ed300000000, + 0x495cd65e00000000, 0x65a18e4800000000, 0x875c06c500000000, + 0xe05cee8800000000, 0x02a1660500000000, 0xb8a65fa400000000, + 0x5a5bd72900000000, 0x3d5b3f6400000000, 0xdfa6b7e900000000, + 0xf35befff00000000, 0x11a6677200000000, 0x76a68f3f00000000, + 0x945b07b200000000}, + {0x0000000000000000, 0xa90b894e00000000, 0x5217129d00000000, + 0xfb1c9bd300000000, 0xe52855e100000000, 0x4c23dcaf00000000, + 0xb73f477c00000000, 0x1e34ce3200000000, 0x8b57db1900000000, + 0x225c525700000000, 0xd940c98400000000, 0x704b40ca00000000, + 0x6e7f8ef800000000, 0xc77407b600000000, 0x3c689c6500000000, + 0x9563152b00000000, 0x16afb63300000000, 0xbfa43f7d00000000, + 0x44b8a4ae00000000, 0xedb32de000000000, 0xf387e3d200000000, + 0x5a8c6a9c00000000, 0xa190f14f00000000, 0x089b780100000000, + 0x9df86d2a00000000, 0x34f3e46400000000, 0xcfef7fb700000000, + 0x66e4f6f900000000, 0x78d038cb00000000, 0xd1dbb18500000000, + 0x2ac72a5600000000, 0x83cca31800000000, 0x2c5e6d6700000000, + 0x8555e42900000000, 0x7e497ffa00000000, 0xd742f6b400000000, + 0xc976388600000000, 0x607db1c800000000, 0x9b612a1b00000000, + 0x326aa35500000000, 0xa709b67e00000000, 0x0e023f3000000000, + 0xf51ea4e300000000, 0x5c152dad00000000, 0x4221e39f00000000, + 0xeb2a6ad100000000, 0x1036f10200000000, 0xb93d784c00000000, + 0x3af1db5400000000, 0x93fa521a00000000, 0x68e6c9c900000000, + 0xc1ed408700000000, 0xdfd98eb500000000, 0x76d207fb00000000, + 0x8dce9c2800000000, 0x24c5156600000000, 0xb1a6004d00000000, + 0x18ad890300000000, 0xe3b112d000000000, 0x4aba9b9e00000000, + 0x548e55ac00000000, 0xfd85dce200000000, 0x0699473100000000, + 0xaf92ce7f00000000, 0x58bcdace00000000, 0xf1b7538000000000, + 0x0aabc85300000000, 0xa3a0411d00000000, 0xbd948f2f00000000, + 0x149f066100000000, 0xef839db200000000, 0x468814fc00000000, + 0xd3eb01d700000000, 0x7ae0889900000000, 0x81fc134a00000000, + 0x28f79a0400000000, 0x36c3543600000000, 0x9fc8dd7800000000, + 0x64d446ab00000000, 0xcddfcfe500000000, 0x4e136cfd00000000, + 0xe718e5b300000000, 0x1c047e6000000000, 0xb50ff72e00000000, + 0xab3b391c00000000, 0x0230b05200000000, 0xf92c2b8100000000, + 0x5027a2cf00000000, 0xc544b7e400000000, 0x6c4f3eaa00000000, + 0x9753a57900000000, 0x3e582c3700000000, 0x206ce20500000000, + 0x89676b4b00000000, 0x727bf09800000000, 0xdb7079d600000000, + 0x74e2b7a900000000, 0xdde93ee700000000, 0x26f5a53400000000, + 0x8ffe2c7a00000000, 0x91cae24800000000, 0x38c16b0600000000, + 0xc3ddf0d500000000, 0x6ad6799b00000000, 0xffb56cb000000000, + 0x56bee5fe00000000, 0xada27e2d00000000, 0x04a9f76300000000, + 0x1a9d395100000000, 0xb396b01f00000000, 0x488a2bcc00000000, + 0xe181a28200000000, 0x624d019a00000000, 0xcb4688d400000000, + 0x305a130700000000, 0x99519a4900000000, 0x8765547b00000000, + 0x2e6edd3500000000, 0xd57246e600000000, 0x7c79cfa800000000, + 0xe91ada8300000000, 0x401153cd00000000, 0xbb0dc81e00000000, + 0x1206415000000000, 0x0c328f6200000000, 0xa539062c00000000, + 0x5e259dff00000000, 0xf72e14b100000000, 0xf17ec44600000000, + 0x58754d0800000000, 0xa369d6db00000000, 0x0a625f9500000000, + 0x145691a700000000, 0xbd5d18e900000000, 0x4641833a00000000, + 0xef4a0a7400000000, 0x7a291f5f00000000, 0xd322961100000000, + 0x283e0dc200000000, 0x8135848c00000000, 0x9f014abe00000000, + 0x360ac3f000000000, 0xcd16582300000000, 0x641dd16d00000000, + 0xe7d1727500000000, 0x4edafb3b00000000, 0xb5c660e800000000, + 0x1ccde9a600000000, 0x02f9279400000000, 0xabf2aeda00000000, + 0x50ee350900000000, 0xf9e5bc4700000000, 0x6c86a96c00000000, + 0xc58d202200000000, 0x3e91bbf100000000, 0x979a32bf00000000, + 0x89aefc8d00000000, 0x20a575c300000000, 0xdbb9ee1000000000, + 0x72b2675e00000000, 0xdd20a92100000000, 0x742b206f00000000, + 0x8f37bbbc00000000, 0x263c32f200000000, 0x3808fcc000000000, + 0x9103758e00000000, 0x6a1fee5d00000000, 0xc314671300000000, + 0x5677723800000000, 0xff7cfb7600000000, 0x046060a500000000, + 0xad6be9eb00000000, 0xb35f27d900000000, 0x1a54ae9700000000, + 0xe148354400000000, 0x4843bc0a00000000, 0xcb8f1f1200000000, + 0x6284965c00000000, 0x99980d8f00000000, 0x309384c100000000, + 0x2ea74af300000000, 0x87acc3bd00000000, 0x7cb0586e00000000, + 0xd5bbd12000000000, 0x40d8c40b00000000, 0xe9d34d4500000000, + 0x12cfd69600000000, 0xbbc45fd800000000, 0xa5f091ea00000000, + 0x0cfb18a400000000, 0xf7e7837700000000, 0x5eec0a3900000000, + 0xa9c21e8800000000, 0x00c997c600000000, 0xfbd50c1500000000, + 0x52de855b00000000, 0x4cea4b6900000000, 0xe5e1c22700000000, + 0x1efd59f400000000, 0xb7f6d0ba00000000, 0x2295c59100000000, + 0x8b9e4cdf00000000, 0x7082d70c00000000, 0xd9895e4200000000, + 0xc7bd907000000000, 0x6eb6193e00000000, 0x95aa82ed00000000, + 0x3ca10ba300000000, 0xbf6da8bb00000000, 0x166621f500000000, + 0xed7aba2600000000, 0x4471336800000000, 0x5a45fd5a00000000, + 0xf34e741400000000, 0x0852efc700000000, 0xa159668900000000, + 0x343a73a200000000, 0x9d31faec00000000, 0x662d613f00000000, + 0xcf26e87100000000, 0xd112264300000000, 0x7819af0d00000000, + 0x830534de00000000, 0x2a0ebd9000000000, 0x859c73ef00000000, + 0x2c97faa100000000, 0xd78b617200000000, 0x7e80e83c00000000, + 0x60b4260e00000000, 0xc9bfaf4000000000, 0x32a3349300000000, + 0x9ba8bddd00000000, 0x0ecba8f600000000, 0xa7c021b800000000, + 0x5cdcba6b00000000, 0xf5d7332500000000, 0xebe3fd1700000000, + 0x42e8745900000000, 0xb9f4ef8a00000000, 0x10ff66c400000000, + 0x9333c5dc00000000, 0x3a384c9200000000, 0xc124d74100000000, + 0x682f5e0f00000000, 0x761b903d00000000, 0xdf10197300000000, + 0x240c82a000000000, 0x8d070bee00000000, 0x18641ec500000000, + 0xb16f978b00000000, 0x4a730c5800000000, 0xe378851600000000, + 0xfd4c4b2400000000, 0x5447c26a00000000, 0xaf5b59b900000000, + 0x0650d0f700000000}, + {0x0000000000000000, 0x479244af00000000, 0xcf22f88500000000, + 0x88b0bc2a00000000, 0xdf4381d000000000, 0x98d1c57f00000000, + 0x1061795500000000, 0x57f33dfa00000000, 0xff81737a00000000, + 0xb81337d500000000, 0x30a38bff00000000, 0x7731cf5000000000, + 0x20c2f2aa00000000, 0x6750b60500000000, 0xefe00a2f00000000, + 0xa8724e8000000000, 0xfe03e7f400000000, 0xb991a35b00000000, + 0x31211f7100000000, 0x76b35bde00000000, 0x2140662400000000, + 0x66d2228b00000000, 0xee629ea100000000, 0xa9f0da0e00000000, + 0x0182948e00000000, 0x4610d02100000000, 0xcea06c0b00000000, + 0x893228a400000000, 0xdec1155e00000000, 0x995351f100000000, + 0x11e3eddb00000000, 0x5671a97400000000, 0xbd01bf3200000000, + 0xfa93fb9d00000000, 0x722347b700000000, 0x35b1031800000000, + 0x62423ee200000000, 0x25d07a4d00000000, 0xad60c66700000000, + 0xeaf282c800000000, 0x4280cc4800000000, 0x051288e700000000, + 0x8da234cd00000000, 0xca30706200000000, 0x9dc34d9800000000, + 0xda51093700000000, 0x52e1b51d00000000, 0x1573f1b200000000, + 0x430258c600000000, 0x04901c6900000000, 0x8c20a04300000000, + 0xcbb2e4ec00000000, 0x9c41d91600000000, 0xdbd39db900000000, + 0x5363219300000000, 0x14f1653c00000000, 0xbc832bbc00000000, + 0xfb116f1300000000, 0x73a1d33900000000, 0x3433979600000000, + 0x63c0aa6c00000000, 0x2452eec300000000, 0xace252e900000000, + 0xeb70164600000000, 0x7a037e6500000000, 0x3d913aca00000000, + 0xb52186e000000000, 0xf2b3c24f00000000, 0xa540ffb500000000, + 0xe2d2bb1a00000000, 0x6a62073000000000, 0x2df0439f00000000, + 0x85820d1f00000000, 0xc21049b000000000, 0x4aa0f59a00000000, + 0x0d32b13500000000, 0x5ac18ccf00000000, 0x1d53c86000000000, + 0x95e3744a00000000, 0xd27130e500000000, 0x8400999100000000, + 0xc392dd3e00000000, 0x4b22611400000000, 0x0cb025bb00000000, + 0x5b43184100000000, 0x1cd15cee00000000, 0x9461e0c400000000, + 0xd3f3a46b00000000, 0x7b81eaeb00000000, 0x3c13ae4400000000, + 0xb4a3126e00000000, 0xf33156c100000000, 0xa4c26b3b00000000, + 0xe3502f9400000000, 0x6be093be00000000, 0x2c72d71100000000, + 0xc702c15700000000, 0x809085f800000000, 0x082039d200000000, + 0x4fb27d7d00000000, 0x1841408700000000, 0x5fd3042800000000, + 0xd763b80200000000, 0x90f1fcad00000000, 0x3883b22d00000000, + 0x7f11f68200000000, 0xf7a14aa800000000, 0xb0330e0700000000, + 0xe7c033fd00000000, 0xa052775200000000, 0x28e2cb7800000000, + 0x6f708fd700000000, 0x390126a300000000, 0x7e93620c00000000, + 0xf623de2600000000, 0xb1b19a8900000000, 0xe642a77300000000, + 0xa1d0e3dc00000000, 0x29605ff600000000, 0x6ef21b5900000000, + 0xc68055d900000000, 0x8112117600000000, 0x09a2ad5c00000000, + 0x4e30e9f300000000, 0x19c3d40900000000, 0x5e5190a600000000, + 0xd6e12c8c00000000, 0x9173682300000000, 0xf406fcca00000000, + 0xb394b86500000000, 0x3b24044f00000000, 0x7cb640e000000000, + 0x2b457d1a00000000, 0x6cd739b500000000, 0xe467859f00000000, + 0xa3f5c13000000000, 0x0b878fb000000000, 0x4c15cb1f00000000, + 0xc4a5773500000000, 0x8337339a00000000, 0xd4c40e6000000000, + 0x93564acf00000000, 0x1be6f6e500000000, 0x5c74b24a00000000, + 0x0a051b3e00000000, 0x4d975f9100000000, 0xc527e3bb00000000, + 0x82b5a71400000000, 0xd5469aee00000000, 0x92d4de4100000000, + 0x1a64626b00000000, 0x5df626c400000000, 0xf584684400000000, + 0xb2162ceb00000000, 0x3aa690c100000000, 0x7d34d46e00000000, + 0x2ac7e99400000000, 0x6d55ad3b00000000, 0xe5e5111100000000, + 0xa27755be00000000, 0x490743f800000000, 0x0e95075700000000, + 0x8625bb7d00000000, 0xc1b7ffd200000000, 0x9644c22800000000, + 0xd1d6868700000000, 0x59663aad00000000, 0x1ef47e0200000000, + 0xb686308200000000, 0xf114742d00000000, 0x79a4c80700000000, + 0x3e368ca800000000, 0x69c5b15200000000, 0x2e57f5fd00000000, + 0xa6e749d700000000, 0xe1750d7800000000, 0xb704a40c00000000, + 0xf096e0a300000000, 0x78265c8900000000, 0x3fb4182600000000, + 0x684725dc00000000, 0x2fd5617300000000, 0xa765dd5900000000, + 0xe0f799f600000000, 0x4885d77600000000, 0x0f1793d900000000, + 0x87a72ff300000000, 0xc0356b5c00000000, 0x97c656a600000000, + 0xd054120900000000, 0x58e4ae2300000000, 0x1f76ea8c00000000, + 0x8e0582af00000000, 0xc997c60000000000, 0x41277a2a00000000, + 0x06b53e8500000000, 0x5146037f00000000, 0x16d447d000000000, + 0x9e64fbfa00000000, 0xd9f6bf5500000000, 0x7184f1d500000000, + 0x3616b57a00000000, 0xbea6095000000000, 0xf9344dff00000000, + 0xaec7700500000000, 0xe95534aa00000000, 0x61e5888000000000, + 0x2677cc2f00000000, 0x7006655b00000000, 0x379421f400000000, + 0xbf249dde00000000, 0xf8b6d97100000000, 0xaf45e48b00000000, + 0xe8d7a02400000000, 0x60671c0e00000000, 0x27f558a100000000, + 0x8f87162100000000, 0xc815528e00000000, 0x40a5eea400000000, + 0x0737aa0b00000000, 0x50c497f100000000, 0x1756d35e00000000, + 0x9fe66f7400000000, 0xd8742bdb00000000, 0x33043d9d00000000, + 0x7496793200000000, 0xfc26c51800000000, 0xbbb481b700000000, + 0xec47bc4d00000000, 0xabd5f8e200000000, 0x236544c800000000, + 0x64f7006700000000, 0xcc854ee700000000, 0x8b170a4800000000, + 0x03a7b66200000000, 0x4435f2cd00000000, 0x13c6cf3700000000, + 0x54548b9800000000, 0xdce437b200000000, 0x9b76731d00000000, + 0xcd07da6900000000, 0x8a959ec600000000, 0x022522ec00000000, + 0x45b7664300000000, 0x12445bb900000000, 0x55d61f1600000000, + 0xdd66a33c00000000, 0x9af4e79300000000, 0x3286a91300000000, + 0x7514edbc00000000, 0xfda4519600000000, 0xba36153900000000, + 0xedc528c300000000, 0xaa576c6c00000000, 0x22e7d04600000000, + 0x657594e900000000}}; + +#else /* W == 4 */ + +local const z_crc_t FAR crc_braid_table[][256] = { + {0x00000000, 0x65673b46, 0xcace768c, 0xafa94dca, 0x4eedeb59, + 0x2b8ad01f, 0x84239dd5, 0xe144a693, 0x9ddbd6b2, 0xf8bcedf4, + 0x5715a03e, 0x32729b78, 0xd3363deb, 0xb65106ad, 0x19f84b67, + 0x7c9f7021, 0xe0c6ab25, 0x85a19063, 0x2a08dda9, 0x4f6fe6ef, + 0xae2b407c, 0xcb4c7b3a, 0x64e536f0, 0x01820db6, 0x7d1d7d97, + 0x187a46d1, 0xb7d30b1b, 0xd2b4305d, 0x33f096ce, 0x5697ad88, + 0xf93ee042, 0x9c59db04, 0x1afc500b, 0x7f9b6b4d, 0xd0322687, + 0xb5551dc1, 0x5411bb52, 0x31768014, 0x9edfcdde, 0xfbb8f698, + 0x872786b9, 0xe240bdff, 0x4de9f035, 0x288ecb73, 0xc9ca6de0, + 0xacad56a6, 0x03041b6c, 0x6663202a, 0xfa3afb2e, 0x9f5dc068, + 0x30f48da2, 0x5593b6e4, 0xb4d71077, 0xd1b02b31, 0x7e1966fb, + 0x1b7e5dbd, 0x67e12d9c, 0x028616da, 0xad2f5b10, 0xc8486056, + 0x290cc6c5, 0x4c6bfd83, 0xe3c2b049, 0x86a58b0f, 0x35f8a016, + 0x509f9b50, 0xff36d69a, 0x9a51eddc, 0x7b154b4f, 0x1e727009, + 0xb1db3dc3, 0xd4bc0685, 0xa82376a4, 0xcd444de2, 0x62ed0028, + 0x078a3b6e, 0xe6ce9dfd, 0x83a9a6bb, 0x2c00eb71, 0x4967d037, + 0xd53e0b33, 0xb0593075, 0x1ff07dbf, 0x7a9746f9, 0x9bd3e06a, + 0xfeb4db2c, 0x511d96e6, 0x347aada0, 0x48e5dd81, 0x2d82e6c7, + 0x822bab0d, 0xe74c904b, 0x060836d8, 0x636f0d9e, 0xccc64054, + 0xa9a17b12, 0x2f04f01d, 0x4a63cb5b, 0xe5ca8691, 0x80adbdd7, + 0x61e91b44, 0x048e2002, 0xab276dc8, 0xce40568e, 0xb2df26af, + 0xd7b81de9, 0x78115023, 0x1d766b65, 0xfc32cdf6, 0x9955f6b0, + 0x36fcbb7a, 0x539b803c, 0xcfc25b38, 0xaaa5607e, 0x050c2db4, + 0x606b16f2, 0x812fb061, 0xe4488b27, 0x4be1c6ed, 0x2e86fdab, + 0x52198d8a, 0x377eb6cc, 0x98d7fb06, 0xfdb0c040, 0x1cf466d3, + 0x79935d95, 0xd63a105f, 0xb35d2b19, 0x6bf1402c, 0x0e967b6a, + 0xa13f36a0, 0xc4580de6, 0x251cab75, 0x407b9033, 0xefd2ddf9, + 0x8ab5e6bf, 0xf62a969e, 0x934dadd8, 0x3ce4e012, 0x5983db54, + 0xb8c77dc7, 0xdda04681, 0x72090b4b, 0x176e300d, 0x8b37eb09, + 0xee50d04f, 0x41f99d85, 0x249ea6c3, 0xc5da0050, 0xa0bd3b16, + 0x0f1476dc, 0x6a734d9a, 0x16ec3dbb, 0x738b06fd, 0xdc224b37, + 0xb9457071, 0x5801d6e2, 0x3d66eda4, 0x92cfa06e, 0xf7a89b28, + 0x710d1027, 0x146a2b61, 0xbbc366ab, 0xdea45ded, 0x3fe0fb7e, + 0x5a87c038, 0xf52e8df2, 0x9049b6b4, 0xecd6c695, 0x89b1fdd3, + 0x2618b019, 0x437f8b5f, 0xa23b2dcc, 0xc75c168a, 0x68f55b40, + 0x0d926006, 0x91cbbb02, 0xf4ac8044, 0x5b05cd8e, 0x3e62f6c8, + 0xdf26505b, 0xba416b1d, 0x15e826d7, 0x708f1d91, 0x0c106db0, + 0x697756f6, 0xc6de1b3c, 0xa3b9207a, 0x42fd86e9, 0x279abdaf, + 0x8833f065, 0xed54cb23, 0x5e09e03a, 0x3b6edb7c, 0x94c796b6, + 0xf1a0adf0, 0x10e40b63, 0x75833025, 0xda2a7def, 0xbf4d46a9, + 0xc3d23688, 0xa6b50dce, 0x091c4004, 0x6c7b7b42, 0x8d3fddd1, + 0xe858e697, 0x47f1ab5d, 0x2296901b, 0xbecf4b1f, 0xdba87059, + 0x74013d93, 0x116606d5, 0xf022a046, 0x95459b00, 0x3aecd6ca, + 0x5f8bed8c, 0x23149dad, 0x4673a6eb, 0xe9daeb21, 0x8cbdd067, + 0x6df976f4, 0x089e4db2, 0xa7370078, 0xc2503b3e, 0x44f5b031, + 0x21928b77, 0x8e3bc6bd, 0xeb5cfdfb, 0x0a185b68, 0x6f7f602e, + 0xc0d62de4, 0xa5b116a2, 0xd92e6683, 0xbc495dc5, 0x13e0100f, + 0x76872b49, 0x97c38dda, 0xf2a4b69c, 0x5d0dfb56, 0x386ac010, + 0xa4331b14, 0xc1542052, 0x6efd6d98, 0x0b9a56de, 0xeadef04d, + 0x8fb9cb0b, 0x201086c1, 0x4577bd87, 0x39e8cda6, 0x5c8ff6e0, + 0xf326bb2a, 0x9641806c, 0x770526ff, 0x12621db9, 0xbdcb5073, + 0xd8ac6b35}, + {0x00000000, 0xd7e28058, 0x74b406f1, 0xa35686a9, 0xe9680de2, + 0x3e8a8dba, 0x9ddc0b13, 0x4a3e8b4b, 0x09a11d85, 0xde439ddd, + 0x7d151b74, 0xaaf79b2c, 0xe0c91067, 0x372b903f, 0x947d1696, + 0x439f96ce, 0x13423b0a, 0xc4a0bb52, 0x67f63dfb, 0xb014bda3, + 0xfa2a36e8, 0x2dc8b6b0, 0x8e9e3019, 0x597cb041, 0x1ae3268f, + 0xcd01a6d7, 0x6e57207e, 0xb9b5a026, 0xf38b2b6d, 0x2469ab35, + 0x873f2d9c, 0x50ddadc4, 0x26847614, 0xf166f64c, 0x523070e5, + 0x85d2f0bd, 0xcfec7bf6, 0x180efbae, 0xbb587d07, 0x6cbafd5f, + 0x2f256b91, 0xf8c7ebc9, 0x5b916d60, 0x8c73ed38, 0xc64d6673, + 0x11afe62b, 0xb2f96082, 0x651be0da, 0x35c64d1e, 0xe224cd46, + 0x41724bef, 0x9690cbb7, 0xdcae40fc, 0x0b4cc0a4, 0xa81a460d, + 0x7ff8c655, 0x3c67509b, 0xeb85d0c3, 0x48d3566a, 0x9f31d632, + 0xd50f5d79, 0x02eddd21, 0xa1bb5b88, 0x7659dbd0, 0x4d08ec28, + 0x9aea6c70, 0x39bcead9, 0xee5e6a81, 0xa460e1ca, 0x73826192, + 0xd0d4e73b, 0x07366763, 0x44a9f1ad, 0x934b71f5, 0x301df75c, + 0xe7ff7704, 0xadc1fc4f, 0x7a237c17, 0xd975fabe, 0x0e977ae6, + 0x5e4ad722, 0x89a8577a, 0x2afed1d3, 0xfd1c518b, 0xb722dac0, + 0x60c05a98, 0xc396dc31, 0x14745c69, 0x57ebcaa7, 0x80094aff, + 0x235fcc56, 0xf4bd4c0e, 0xbe83c745, 0x6961471d, 0xca37c1b4, + 0x1dd541ec, 0x6b8c9a3c, 0xbc6e1a64, 0x1f389ccd, 0xc8da1c95, + 0x82e497de, 0x55061786, 0xf650912f, 0x21b21177, 0x622d87b9, + 0xb5cf07e1, 0x16998148, 0xc17b0110, 0x8b458a5b, 0x5ca70a03, + 0xfff18caa, 0x28130cf2, 0x78cea136, 0xaf2c216e, 0x0c7aa7c7, + 0xdb98279f, 0x91a6acd4, 0x46442c8c, 0xe512aa25, 0x32f02a7d, + 0x716fbcb3, 0xa68d3ceb, 0x05dbba42, 0xd2393a1a, 0x9807b151, + 0x4fe53109, 0xecb3b7a0, 0x3b5137f8, 0x9a11d850, 0x4df35808, + 0xeea5dea1, 0x39475ef9, 0x7379d5b2, 0xa49b55ea, 0x07cdd343, + 0xd02f531b, 0x93b0c5d5, 0x4452458d, 0xe704c324, 0x30e6437c, + 0x7ad8c837, 0xad3a486f, 0x0e6ccec6, 0xd98e4e9e, 0x8953e35a, + 0x5eb16302, 0xfde7e5ab, 0x2a0565f3, 0x603beeb8, 0xb7d96ee0, + 0x148fe849, 0xc36d6811, 0x80f2fedf, 0x57107e87, 0xf446f82e, + 0x23a47876, 0x699af33d, 0xbe787365, 0x1d2ef5cc, 0xcacc7594, + 0xbc95ae44, 0x6b772e1c, 0xc821a8b5, 0x1fc328ed, 0x55fda3a6, + 0x821f23fe, 0x2149a557, 0xf6ab250f, 0xb534b3c1, 0x62d63399, + 0xc180b530, 0x16623568, 0x5c5cbe23, 0x8bbe3e7b, 0x28e8b8d2, + 0xff0a388a, 0xafd7954e, 0x78351516, 0xdb6393bf, 0x0c8113e7, + 0x46bf98ac, 0x915d18f4, 0x320b9e5d, 0xe5e91e05, 0xa67688cb, + 0x71940893, 0xd2c28e3a, 0x05200e62, 0x4f1e8529, 0x98fc0571, + 0x3baa83d8, 0xec480380, 0xd7193478, 0x00fbb420, 0xa3ad3289, + 0x744fb2d1, 0x3e71399a, 0xe993b9c2, 0x4ac53f6b, 0x9d27bf33, + 0xdeb829fd, 0x095aa9a5, 0xaa0c2f0c, 0x7deeaf54, 0x37d0241f, + 0xe032a447, 0x436422ee, 0x9486a2b6, 0xc45b0f72, 0x13b98f2a, + 0xb0ef0983, 0x670d89db, 0x2d330290, 0xfad182c8, 0x59870461, + 0x8e658439, 0xcdfa12f7, 0x1a1892af, 0xb94e1406, 0x6eac945e, + 0x24921f15, 0xf3709f4d, 0x502619e4, 0x87c499bc, 0xf19d426c, + 0x267fc234, 0x8529449d, 0x52cbc4c5, 0x18f54f8e, 0xcf17cfd6, + 0x6c41497f, 0xbba3c927, 0xf83c5fe9, 0x2fdedfb1, 0x8c885918, + 0x5b6ad940, 0x1154520b, 0xc6b6d253, 0x65e054fa, 0xb202d4a2, + 0xe2df7966, 0x353df93e, 0x966b7f97, 0x4189ffcf, 0x0bb77484, + 0xdc55f4dc, 0x7f037275, 0xa8e1f22d, 0xeb7e64e3, 0x3c9ce4bb, + 0x9fca6212, 0x4828e24a, 0x02166901, 0xd5f4e959, 0x76a26ff0, + 0xa140efa8}, + {0x00000000, 0xef52b6e1, 0x05d46b83, 0xea86dd62, 0x0ba8d706, + 0xe4fa61e7, 0x0e7cbc85, 0xe12e0a64, 0x1751ae0c, 0xf80318ed, + 0x1285c58f, 0xfdd7736e, 0x1cf9790a, 0xf3abcfeb, 0x192d1289, + 0xf67fa468, 0x2ea35c18, 0xc1f1eaf9, 0x2b77379b, 0xc425817a, + 0x250b8b1e, 0xca593dff, 0x20dfe09d, 0xcf8d567c, 0x39f2f214, + 0xd6a044f5, 0x3c269997, 0xd3742f76, 0x325a2512, 0xdd0893f3, + 0x378e4e91, 0xd8dcf870, 0x5d46b830, 0xb2140ed1, 0x5892d3b3, + 0xb7c06552, 0x56ee6f36, 0xb9bcd9d7, 0x533a04b5, 0xbc68b254, + 0x4a17163c, 0xa545a0dd, 0x4fc37dbf, 0xa091cb5e, 0x41bfc13a, + 0xaeed77db, 0x446baab9, 0xab391c58, 0x73e5e428, 0x9cb752c9, + 0x76318fab, 0x9963394a, 0x784d332e, 0x971f85cf, 0x7d9958ad, + 0x92cbee4c, 0x64b44a24, 0x8be6fcc5, 0x616021a7, 0x8e329746, + 0x6f1c9d22, 0x804e2bc3, 0x6ac8f6a1, 0x859a4040, 0xba8d7060, + 0x55dfc681, 0xbf591be3, 0x500bad02, 0xb125a766, 0x5e771187, + 0xb4f1cce5, 0x5ba37a04, 0xaddcde6c, 0x428e688d, 0xa808b5ef, + 0x475a030e, 0xa674096a, 0x4926bf8b, 0xa3a062e9, 0x4cf2d408, + 0x942e2c78, 0x7b7c9a99, 0x91fa47fb, 0x7ea8f11a, 0x9f86fb7e, + 0x70d44d9f, 0x9a5290fd, 0x7500261c, 0x837f8274, 0x6c2d3495, + 0x86abe9f7, 0x69f95f16, 0x88d75572, 0x6785e393, 0x8d033ef1, + 0x62518810, 0xe7cbc850, 0x08997eb1, 0xe21fa3d3, 0x0d4d1532, + 0xec631f56, 0x0331a9b7, 0xe9b774d5, 0x06e5c234, 0xf09a665c, + 0x1fc8d0bd, 0xf54e0ddf, 0x1a1cbb3e, 0xfb32b15a, 0x146007bb, + 0xfee6dad9, 0x11b46c38, 0xc9689448, 0x263a22a9, 0xccbcffcb, + 0x23ee492a, 0xc2c0434e, 0x2d92f5af, 0xc71428cd, 0x28469e2c, + 0xde393a44, 0x316b8ca5, 0xdbed51c7, 0x34bfe726, 0xd591ed42, + 0x3ac35ba3, 0xd04586c1, 0x3f173020, 0xae6be681, 0x41395060, + 0xabbf8d02, 0x44ed3be3, 0xa5c33187, 0x4a918766, 0xa0175a04, + 0x4f45ece5, 0xb93a488d, 0x5668fe6c, 0xbcee230e, 0x53bc95ef, + 0xb2929f8b, 0x5dc0296a, 0xb746f408, 0x581442e9, 0x80c8ba99, + 0x6f9a0c78, 0x851cd11a, 0x6a4e67fb, 0x8b606d9f, 0x6432db7e, + 0x8eb4061c, 0x61e6b0fd, 0x97991495, 0x78cba274, 0x924d7f16, + 0x7d1fc9f7, 0x9c31c393, 0x73637572, 0x99e5a810, 0x76b71ef1, + 0xf32d5eb1, 0x1c7fe850, 0xf6f93532, 0x19ab83d3, 0xf88589b7, + 0x17d73f56, 0xfd51e234, 0x120354d5, 0xe47cf0bd, 0x0b2e465c, + 0xe1a89b3e, 0x0efa2ddf, 0xefd427bb, 0x0086915a, 0xea004c38, + 0x0552fad9, 0xdd8e02a9, 0x32dcb448, 0xd85a692a, 0x3708dfcb, + 0xd626d5af, 0x3974634e, 0xd3f2be2c, 0x3ca008cd, 0xcadfaca5, + 0x258d1a44, 0xcf0bc726, 0x205971c7, 0xc1777ba3, 0x2e25cd42, + 0xc4a31020, 0x2bf1a6c1, 0x14e696e1, 0xfbb42000, 0x1132fd62, + 0xfe604b83, 0x1f4e41e7, 0xf01cf706, 0x1a9a2a64, 0xf5c89c85, + 0x03b738ed, 0xece58e0c, 0x0663536e, 0xe931e58f, 0x081fefeb, + 0xe74d590a, 0x0dcb8468, 0xe2993289, 0x3a45caf9, 0xd5177c18, + 0x3f91a17a, 0xd0c3179b, 0x31ed1dff, 0xdebfab1e, 0x3439767c, + 0xdb6bc09d, 0x2d1464f5, 0xc246d214, 0x28c00f76, 0xc792b997, + 0x26bcb3f3, 0xc9ee0512, 0x2368d870, 0xcc3a6e91, 0x49a02ed1, + 0xa6f29830, 0x4c744552, 0xa326f3b3, 0x4208f9d7, 0xad5a4f36, + 0x47dc9254, 0xa88e24b5, 0x5ef180dd, 0xb1a3363c, 0x5b25eb5e, + 0xb4775dbf, 0x555957db, 0xba0be13a, 0x508d3c58, 0xbfdf8ab9, + 0x670372c9, 0x8851c428, 0x62d7194a, 0x8d85afab, 0x6caba5cf, + 0x83f9132e, 0x697fce4c, 0x862d78ad, 0x7052dcc5, 0x9f006a24, + 0x7586b746, 0x9ad401a7, 0x7bfa0bc3, 0x94a8bd22, 0x7e2e6040, + 0x917cd6a1}, + {0x00000000, 0x87a6cb43, 0xd43c90c7, 0x539a5b84, 0x730827cf, + 0xf4aeec8c, 0xa734b708, 0x20927c4b, 0xe6104f9e, 0x61b684dd, + 0x322cdf59, 0xb58a141a, 0x95186851, 0x12bea312, 0x4124f896, + 0xc68233d5, 0x1751997d, 0x90f7523e, 0xc36d09ba, 0x44cbc2f9, + 0x6459beb2, 0xe3ff75f1, 0xb0652e75, 0x37c3e536, 0xf141d6e3, + 0x76e71da0, 0x257d4624, 0xa2db8d67, 0x8249f12c, 0x05ef3a6f, + 0x567561eb, 0xd1d3aaa8, 0x2ea332fa, 0xa905f9b9, 0xfa9fa23d, + 0x7d39697e, 0x5dab1535, 0xda0dde76, 0x899785f2, 0x0e314eb1, + 0xc8b37d64, 0x4f15b627, 0x1c8feda3, 0x9b2926e0, 0xbbbb5aab, + 0x3c1d91e8, 0x6f87ca6c, 0xe821012f, 0x39f2ab87, 0xbe5460c4, + 0xedce3b40, 0x6a68f003, 0x4afa8c48, 0xcd5c470b, 0x9ec61c8f, + 0x1960d7cc, 0xdfe2e419, 0x58442f5a, 0x0bde74de, 0x8c78bf9d, + 0xaceac3d6, 0x2b4c0895, 0x78d65311, 0xff709852, 0x5d4665f4, + 0xdae0aeb7, 0x897af533, 0x0edc3e70, 0x2e4e423b, 0xa9e88978, + 0xfa72d2fc, 0x7dd419bf, 0xbb562a6a, 0x3cf0e129, 0x6f6abaad, + 0xe8cc71ee, 0xc85e0da5, 0x4ff8c6e6, 0x1c629d62, 0x9bc45621, + 0x4a17fc89, 0xcdb137ca, 0x9e2b6c4e, 0x198da70d, 0x391fdb46, + 0xbeb91005, 0xed234b81, 0x6a8580c2, 0xac07b317, 0x2ba17854, + 0x783b23d0, 0xff9de893, 0xdf0f94d8, 0x58a95f9b, 0x0b33041f, + 0x8c95cf5c, 0x73e5570e, 0xf4439c4d, 0xa7d9c7c9, 0x207f0c8a, + 0x00ed70c1, 0x874bbb82, 0xd4d1e006, 0x53772b45, 0x95f51890, + 0x1253d3d3, 0x41c98857, 0xc66f4314, 0xe6fd3f5f, 0x615bf41c, + 0x32c1af98, 0xb56764db, 0x64b4ce73, 0xe3120530, 0xb0885eb4, + 0x372e95f7, 0x17bce9bc, 0x901a22ff, 0xc380797b, 0x4426b238, + 0x82a481ed, 0x05024aae, 0x5698112a, 0xd13eda69, 0xf1aca622, + 0x760a6d61, 0x259036e5, 0xa236fda6, 0xba8ccbe8, 0x3d2a00ab, + 0x6eb05b2f, 0xe916906c, 0xc984ec27, 0x4e222764, 0x1db87ce0, + 0x9a1eb7a3, 0x5c9c8476, 0xdb3a4f35, 0x88a014b1, 0x0f06dff2, + 0x2f94a3b9, 0xa83268fa, 0xfba8337e, 0x7c0ef83d, 0xaddd5295, + 0x2a7b99d6, 0x79e1c252, 0xfe470911, 0xded5755a, 0x5973be19, + 0x0ae9e59d, 0x8d4f2ede, 0x4bcd1d0b, 0xcc6bd648, 0x9ff18dcc, + 0x1857468f, 0x38c53ac4, 0xbf63f187, 0xecf9aa03, 0x6b5f6140, + 0x942ff912, 0x13893251, 0x401369d5, 0xc7b5a296, 0xe727dedd, + 0x6081159e, 0x331b4e1a, 0xb4bd8559, 0x723fb68c, 0xf5997dcf, + 0xa603264b, 0x21a5ed08, 0x01379143, 0x86915a00, 0xd50b0184, + 0x52adcac7, 0x837e606f, 0x04d8ab2c, 0x5742f0a8, 0xd0e43beb, + 0xf07647a0, 0x77d08ce3, 0x244ad767, 0xa3ec1c24, 0x656e2ff1, + 0xe2c8e4b2, 0xb152bf36, 0x36f47475, 0x1666083e, 0x91c0c37d, + 0xc25a98f9, 0x45fc53ba, 0xe7caae1c, 0x606c655f, 0x33f63edb, + 0xb450f598, 0x94c289d3, 0x13644290, 0x40fe1914, 0xc758d257, + 0x01dae182, 0x867c2ac1, 0xd5e67145, 0x5240ba06, 0x72d2c64d, + 0xf5740d0e, 0xa6ee568a, 0x21489dc9, 0xf09b3761, 0x773dfc22, + 0x24a7a7a6, 0xa3016ce5, 0x839310ae, 0x0435dbed, 0x57af8069, + 0xd0094b2a, 0x168b78ff, 0x912db3bc, 0xc2b7e838, 0x4511237b, + 0x65835f30, 0xe2259473, 0xb1bfcff7, 0x361904b4, 0xc9699ce6, + 0x4ecf57a5, 0x1d550c21, 0x9af3c762, 0xba61bb29, 0x3dc7706a, + 0x6e5d2bee, 0xe9fbe0ad, 0x2f79d378, 0xa8df183b, 0xfb4543bf, + 0x7ce388fc, 0x5c71f4b7, 0xdbd73ff4, 0x884d6470, 0x0febaf33, + 0xde38059b, 0x599eced8, 0x0a04955c, 0x8da25e1f, 0xad302254, + 0x2a96e917, 0x790cb293, 0xfeaa79d0, 0x38284a05, 0xbf8e8146, + 0xec14dac2, 0x6bb21181, 0x4b206dca, 0xcc86a689, 0x9f1cfd0d, + 0x18ba364e}}; + +local const z_word_t FAR crc_braid_big_table[][256] = { + {0x00000000, 0x43cba687, 0xc7903cd4, 0x845b9a53, 0xcf270873, + 0x8cecaef4, 0x08b734a7, 0x4b7c9220, 0x9e4f10e6, 0xdd84b661, + 0x59df2c32, 0x1a148ab5, 0x51681895, 0x12a3be12, 0x96f82441, + 0xd53382c6, 0x7d995117, 0x3e52f790, 0xba096dc3, 0xf9c2cb44, + 0xb2be5964, 0xf175ffe3, 0x752e65b0, 0x36e5c337, 0xe3d641f1, + 0xa01de776, 0x24467d25, 0x678ddba2, 0x2cf14982, 0x6f3aef05, + 0xeb617556, 0xa8aad3d1, 0xfa32a32e, 0xb9f905a9, 0x3da29ffa, + 0x7e69397d, 0x3515ab5d, 0x76de0dda, 0xf2859789, 0xb14e310e, + 0x647db3c8, 0x27b6154f, 0xa3ed8f1c, 0xe026299b, 0xab5abbbb, + 0xe8911d3c, 0x6cca876f, 0x2f0121e8, 0x87abf239, 0xc46054be, + 0x403bceed, 0x03f0686a, 0x488cfa4a, 0x0b475ccd, 0x8f1cc69e, + 0xccd76019, 0x19e4e2df, 0x5a2f4458, 0xde74de0b, 0x9dbf788c, + 0xd6c3eaac, 0x95084c2b, 0x1153d678, 0x529870ff, 0xf465465d, + 0xb7aee0da, 0x33f57a89, 0x703edc0e, 0x3b424e2e, 0x7889e8a9, + 0xfcd272fa, 0xbf19d47d, 0x6a2a56bb, 0x29e1f03c, 0xadba6a6f, + 0xee71cce8, 0xa50d5ec8, 0xe6c6f84f, 0x629d621c, 0x2156c49b, + 0x89fc174a, 0xca37b1cd, 0x4e6c2b9e, 0x0da78d19, 0x46db1f39, + 0x0510b9be, 0x814b23ed, 0xc280856a, 0x17b307ac, 0x5478a12b, + 0xd0233b78, 0x93e89dff, 0xd8940fdf, 0x9b5fa958, 0x1f04330b, + 0x5ccf958c, 0x0e57e573, 0x4d9c43f4, 0xc9c7d9a7, 0x8a0c7f20, + 0xc170ed00, 0x82bb4b87, 0x06e0d1d4, 0x452b7753, 0x9018f595, + 0xd3d35312, 0x5788c941, 0x14436fc6, 0x5f3ffde6, 0x1cf45b61, + 0x98afc132, 0xdb6467b5, 0x73ceb464, 0x300512e3, 0xb45e88b0, + 0xf7952e37, 0xbce9bc17, 0xff221a90, 0x7b7980c3, 0x38b22644, + 0xed81a482, 0xae4a0205, 0x2a119856, 0x69da3ed1, 0x22a6acf1, + 0x616d0a76, 0xe5369025, 0xa6fd36a2, 0xe8cb8cba, 0xab002a3d, + 0x2f5bb06e, 0x6c9016e9, 0x27ec84c9, 0x6427224e, 0xe07cb81d, + 0xa3b71e9a, 0x76849c5c, 0x354f3adb, 0xb114a088, 0xf2df060f, + 0xb9a3942f, 0xfa6832a8, 0x7e33a8fb, 0x3df80e7c, 0x9552ddad, + 0xd6997b2a, 0x52c2e179, 0x110947fe, 0x5a75d5de, 0x19be7359, + 0x9de5e90a, 0xde2e4f8d, 0x0b1dcd4b, 0x48d66bcc, 0xcc8df19f, + 0x8f465718, 0xc43ac538, 0x87f163bf, 0x03aaf9ec, 0x40615f6b, + 0x12f92f94, 0x51328913, 0xd5691340, 0x96a2b5c7, 0xddde27e7, + 0x9e158160, 0x1a4e1b33, 0x5985bdb4, 0x8cb63f72, 0xcf7d99f5, + 0x4b2603a6, 0x08eda521, 0x43913701, 0x005a9186, 0x84010bd5, + 0xc7caad52, 0x6f607e83, 0x2cabd804, 0xa8f04257, 0xeb3be4d0, + 0xa04776f0, 0xe38cd077, 0x67d74a24, 0x241ceca3, 0xf12f6e65, + 0xb2e4c8e2, 0x36bf52b1, 0x7574f436, 0x3e086616, 0x7dc3c091, + 0xf9985ac2, 0xba53fc45, 0x1caecae7, 0x5f656c60, 0xdb3ef633, + 0x98f550b4, 0xd389c294, 0x90426413, 0x1419fe40, 0x57d258c7, + 0x82e1da01, 0xc12a7c86, 0x4571e6d5, 0x06ba4052, 0x4dc6d272, + 0x0e0d74f5, 0x8a56eea6, 0xc99d4821, 0x61379bf0, 0x22fc3d77, + 0xa6a7a724, 0xe56c01a3, 0xae109383, 0xeddb3504, 0x6980af57, + 0x2a4b09d0, 0xff788b16, 0xbcb32d91, 0x38e8b7c2, 0x7b231145, + 0x305f8365, 0x739425e2, 0xf7cfbfb1, 0xb4041936, 0xe69c69c9, + 0xa557cf4e, 0x210c551d, 0x62c7f39a, 0x29bb61ba, 0x6a70c73d, + 0xee2b5d6e, 0xade0fbe9, 0x78d3792f, 0x3b18dfa8, 0xbf4345fb, + 0xfc88e37c, 0xb7f4715c, 0xf43fd7db, 0x70644d88, 0x33afeb0f, + 0x9b0538de, 0xd8ce9e59, 0x5c95040a, 0x1f5ea28d, 0x542230ad, + 0x17e9962a, 0x93b20c79, 0xd079aafe, 0x054a2838, 0x46818ebf, + 0xc2da14ec, 0x8111b26b, 0xca6d204b, 0x89a686cc, 0x0dfd1c9f, + 0x4e36ba18}, + {0x00000000, 0xe1b652ef, 0x836bd405, 0x62dd86ea, 0x06d7a80b, + 0xe761fae4, 0x85bc7c0e, 0x640a2ee1, 0x0cae5117, 0xed1803f8, + 0x8fc58512, 0x6e73d7fd, 0x0a79f91c, 0xebcfabf3, 0x89122d19, + 0x68a47ff6, 0x185ca32e, 0xf9eaf1c1, 0x9b37772b, 0x7a8125c4, + 0x1e8b0b25, 0xff3d59ca, 0x9de0df20, 0x7c568dcf, 0x14f2f239, + 0xf544a0d6, 0x9799263c, 0x762f74d3, 0x12255a32, 0xf39308dd, + 0x914e8e37, 0x70f8dcd8, 0x30b8465d, 0xd10e14b2, 0xb3d39258, + 0x5265c0b7, 0x366fee56, 0xd7d9bcb9, 0xb5043a53, 0x54b268bc, + 0x3c16174a, 0xdda045a5, 0xbf7dc34f, 0x5ecb91a0, 0x3ac1bf41, + 0xdb77edae, 0xb9aa6b44, 0x581c39ab, 0x28e4e573, 0xc952b79c, + 0xab8f3176, 0x4a396399, 0x2e334d78, 0xcf851f97, 0xad58997d, + 0x4ceecb92, 0x244ab464, 0xc5fce68b, 0xa7216061, 0x4697328e, + 0x229d1c6f, 0xc32b4e80, 0xa1f6c86a, 0x40409a85, 0x60708dba, + 0x81c6df55, 0xe31b59bf, 0x02ad0b50, 0x66a725b1, 0x8711775e, + 0xe5ccf1b4, 0x047aa35b, 0x6cdedcad, 0x8d688e42, 0xefb508a8, + 0x0e035a47, 0x6a0974a6, 0x8bbf2649, 0xe962a0a3, 0x08d4f24c, + 0x782c2e94, 0x999a7c7b, 0xfb47fa91, 0x1af1a87e, 0x7efb869f, + 0x9f4dd470, 0xfd90529a, 0x1c260075, 0x74827f83, 0x95342d6c, + 0xf7e9ab86, 0x165ff969, 0x7255d788, 0x93e38567, 0xf13e038d, + 0x10885162, 0x50c8cbe7, 0xb17e9908, 0xd3a31fe2, 0x32154d0d, + 0x561f63ec, 0xb7a93103, 0xd574b7e9, 0x34c2e506, 0x5c669af0, + 0xbdd0c81f, 0xdf0d4ef5, 0x3ebb1c1a, 0x5ab132fb, 0xbb076014, + 0xd9dae6fe, 0x386cb411, 0x489468c9, 0xa9223a26, 0xcbffbccc, + 0x2a49ee23, 0x4e43c0c2, 0xaff5922d, 0xcd2814c7, 0x2c9e4628, + 0x443a39de, 0xa58c6b31, 0xc751eddb, 0x26e7bf34, 0x42ed91d5, + 0xa35bc33a, 0xc18645d0, 0x2030173f, 0x81e66bae, 0x60503941, + 0x028dbfab, 0xe33bed44, 0x8731c3a5, 0x6687914a, 0x045a17a0, + 0xe5ec454f, 0x8d483ab9, 0x6cfe6856, 0x0e23eebc, 0xef95bc53, + 0x8b9f92b2, 0x6a29c05d, 0x08f446b7, 0xe9421458, 0x99bac880, + 0x780c9a6f, 0x1ad11c85, 0xfb674e6a, 0x9f6d608b, 0x7edb3264, + 0x1c06b48e, 0xfdb0e661, 0x95149997, 0x74a2cb78, 0x167f4d92, + 0xf7c91f7d, 0x93c3319c, 0x72756373, 0x10a8e599, 0xf11eb776, + 0xb15e2df3, 0x50e87f1c, 0x3235f9f6, 0xd383ab19, 0xb78985f8, + 0x563fd717, 0x34e251fd, 0xd5540312, 0xbdf07ce4, 0x5c462e0b, + 0x3e9ba8e1, 0xdf2dfa0e, 0xbb27d4ef, 0x5a918600, 0x384c00ea, + 0xd9fa5205, 0xa9028edd, 0x48b4dc32, 0x2a695ad8, 0xcbdf0837, + 0xafd526d6, 0x4e637439, 0x2cbef2d3, 0xcd08a03c, 0xa5acdfca, + 0x441a8d25, 0x26c70bcf, 0xc7715920, 0xa37b77c1, 0x42cd252e, + 0x2010a3c4, 0xc1a6f12b, 0xe196e614, 0x0020b4fb, 0x62fd3211, + 0x834b60fe, 0xe7414e1f, 0x06f71cf0, 0x642a9a1a, 0x859cc8f5, + 0xed38b703, 0x0c8ee5ec, 0x6e536306, 0x8fe531e9, 0xebef1f08, + 0x0a594de7, 0x6884cb0d, 0x893299e2, 0xf9ca453a, 0x187c17d5, + 0x7aa1913f, 0x9b17c3d0, 0xff1ded31, 0x1eabbfde, 0x7c763934, + 0x9dc06bdb, 0xf564142d, 0x14d246c2, 0x760fc028, 0x97b992c7, + 0xf3b3bc26, 0x1205eec9, 0x70d86823, 0x916e3acc, 0xd12ea049, + 0x3098f2a6, 0x5245744c, 0xb3f326a3, 0xd7f90842, 0x364f5aad, + 0x5492dc47, 0xb5248ea8, 0xdd80f15e, 0x3c36a3b1, 0x5eeb255b, + 0xbf5d77b4, 0xdb575955, 0x3ae10bba, 0x583c8d50, 0xb98adfbf, + 0xc9720367, 0x28c45188, 0x4a19d762, 0xabaf858d, 0xcfa5ab6c, + 0x2e13f983, 0x4cce7f69, 0xad782d86, 0xc5dc5270, 0x246a009f, + 0x46b78675, 0xa701d49a, 0xc30bfa7b, 0x22bda894, 0x40602e7e, + 0xa1d67c91}, + {0x00000000, 0x5880e2d7, 0xf106b474, 0xa98656a3, 0xe20d68e9, + 0xba8d8a3e, 0x130bdc9d, 0x4b8b3e4a, 0x851da109, 0xdd9d43de, + 0x741b157d, 0x2c9bf7aa, 0x6710c9e0, 0x3f902b37, 0x96167d94, + 0xce969f43, 0x0a3b4213, 0x52bba0c4, 0xfb3df667, 0xa3bd14b0, + 0xe8362afa, 0xb0b6c82d, 0x19309e8e, 0x41b07c59, 0x8f26e31a, + 0xd7a601cd, 0x7e20576e, 0x26a0b5b9, 0x6d2b8bf3, 0x35ab6924, + 0x9c2d3f87, 0xc4addd50, 0x14768426, 0x4cf666f1, 0xe5703052, + 0xbdf0d285, 0xf67beccf, 0xaefb0e18, 0x077d58bb, 0x5ffdba6c, + 0x916b252f, 0xc9ebc7f8, 0x606d915b, 0x38ed738c, 0x73664dc6, + 0x2be6af11, 0x8260f9b2, 0xdae01b65, 0x1e4dc635, 0x46cd24e2, + 0xef4b7241, 0xb7cb9096, 0xfc40aedc, 0xa4c04c0b, 0x0d461aa8, + 0x55c6f87f, 0x9b50673c, 0xc3d085eb, 0x6a56d348, 0x32d6319f, + 0x795d0fd5, 0x21dded02, 0x885bbba1, 0xd0db5976, 0x28ec084d, + 0x706cea9a, 0xd9eabc39, 0x816a5eee, 0xcae160a4, 0x92618273, + 0x3be7d4d0, 0x63673607, 0xadf1a944, 0xf5714b93, 0x5cf71d30, + 0x0477ffe7, 0x4ffcc1ad, 0x177c237a, 0xbefa75d9, 0xe67a970e, + 0x22d74a5e, 0x7a57a889, 0xd3d1fe2a, 0x8b511cfd, 0xc0da22b7, + 0x985ac060, 0x31dc96c3, 0x695c7414, 0xa7caeb57, 0xff4a0980, + 0x56cc5f23, 0x0e4cbdf4, 0x45c783be, 0x1d476169, 0xb4c137ca, + 0xec41d51d, 0x3c9a8c6b, 0x641a6ebc, 0xcd9c381f, 0x951cdac8, + 0xde97e482, 0x86170655, 0x2f9150f6, 0x7711b221, 0xb9872d62, + 0xe107cfb5, 0x48819916, 0x10017bc1, 0x5b8a458b, 0x030aa75c, + 0xaa8cf1ff, 0xf20c1328, 0x36a1ce78, 0x6e212caf, 0xc7a77a0c, + 0x9f2798db, 0xd4aca691, 0x8c2c4446, 0x25aa12e5, 0x7d2af032, + 0xb3bc6f71, 0xeb3c8da6, 0x42badb05, 0x1a3a39d2, 0x51b10798, + 0x0931e54f, 0xa0b7b3ec, 0xf837513b, 0x50d8119a, 0x0858f34d, + 0xa1dea5ee, 0xf95e4739, 0xb2d57973, 0xea559ba4, 0x43d3cd07, + 0x1b532fd0, 0xd5c5b093, 0x8d455244, 0x24c304e7, 0x7c43e630, + 0x37c8d87a, 0x6f483aad, 0xc6ce6c0e, 0x9e4e8ed9, 0x5ae35389, + 0x0263b15e, 0xabe5e7fd, 0xf365052a, 0xb8ee3b60, 0xe06ed9b7, + 0x49e88f14, 0x11686dc3, 0xdffef280, 0x877e1057, 0x2ef846f4, + 0x7678a423, 0x3df39a69, 0x657378be, 0xccf52e1d, 0x9475ccca, + 0x44ae95bc, 0x1c2e776b, 0xb5a821c8, 0xed28c31f, 0xa6a3fd55, + 0xfe231f82, 0x57a54921, 0x0f25abf6, 0xc1b334b5, 0x9933d662, + 0x30b580c1, 0x68356216, 0x23be5c5c, 0x7b3ebe8b, 0xd2b8e828, + 0x8a380aff, 0x4e95d7af, 0x16153578, 0xbf9363db, 0xe713810c, + 0xac98bf46, 0xf4185d91, 0x5d9e0b32, 0x051ee9e5, 0xcb8876a6, + 0x93089471, 0x3a8ec2d2, 0x620e2005, 0x29851e4f, 0x7105fc98, + 0xd883aa3b, 0x800348ec, 0x783419d7, 0x20b4fb00, 0x8932ada3, + 0xd1b24f74, 0x9a39713e, 0xc2b993e9, 0x6b3fc54a, 0x33bf279d, + 0xfd29b8de, 0xa5a95a09, 0x0c2f0caa, 0x54afee7d, 0x1f24d037, + 0x47a432e0, 0xee226443, 0xb6a28694, 0x720f5bc4, 0x2a8fb913, + 0x8309efb0, 0xdb890d67, 0x9002332d, 0xc882d1fa, 0x61048759, + 0x3984658e, 0xf712facd, 0xaf92181a, 0x06144eb9, 0x5e94ac6e, + 0x151f9224, 0x4d9f70f3, 0xe4192650, 0xbc99c487, 0x6c429df1, + 0x34c27f26, 0x9d442985, 0xc5c4cb52, 0x8e4ff518, 0xd6cf17cf, + 0x7f49416c, 0x27c9a3bb, 0xe95f3cf8, 0xb1dfde2f, 0x1859888c, + 0x40d96a5b, 0x0b525411, 0x53d2b6c6, 0xfa54e065, 0xa2d402b2, + 0x6679dfe2, 0x3ef93d35, 0x977f6b96, 0xcfff8941, 0x8474b70b, + 0xdcf455dc, 0x7572037f, 0x2df2e1a8, 0xe3647eeb, 0xbbe49c3c, + 0x1262ca9f, 0x4ae22848, 0x01691602, 0x59e9f4d5, 0xf06fa276, + 0xa8ef40a1}, + {0x00000000, 0x463b6765, 0x8c76ceca, 0xca4da9af, 0x59ebed4e, + 0x1fd08a2b, 0xd59d2384, 0x93a644e1, 0xb2d6db9d, 0xf4edbcf8, + 0x3ea01557, 0x789b7232, 0xeb3d36d3, 0xad0651b6, 0x674bf819, + 0x21709f7c, 0x25abc6e0, 0x6390a185, 0xa9dd082a, 0xefe66f4f, + 0x7c402bae, 0x3a7b4ccb, 0xf036e564, 0xb60d8201, 0x977d1d7d, + 0xd1467a18, 0x1b0bd3b7, 0x5d30b4d2, 0xce96f033, 0x88ad9756, + 0x42e03ef9, 0x04db599c, 0x0b50fc1a, 0x4d6b9b7f, 0x872632d0, + 0xc11d55b5, 0x52bb1154, 0x14807631, 0xdecddf9e, 0x98f6b8fb, + 0xb9862787, 0xffbd40e2, 0x35f0e94d, 0x73cb8e28, 0xe06dcac9, + 0xa656adac, 0x6c1b0403, 0x2a206366, 0x2efb3afa, 0x68c05d9f, + 0xa28df430, 0xe4b69355, 0x7710d7b4, 0x312bb0d1, 0xfb66197e, + 0xbd5d7e1b, 0x9c2de167, 0xda168602, 0x105b2fad, 0x566048c8, + 0xc5c60c29, 0x83fd6b4c, 0x49b0c2e3, 0x0f8ba586, 0x16a0f835, + 0x509b9f50, 0x9ad636ff, 0xdced519a, 0x4f4b157b, 0x0970721e, + 0xc33ddbb1, 0x8506bcd4, 0xa47623a8, 0xe24d44cd, 0x2800ed62, + 0x6e3b8a07, 0xfd9dcee6, 0xbba6a983, 0x71eb002c, 0x37d06749, + 0x330b3ed5, 0x753059b0, 0xbf7df01f, 0xf946977a, 0x6ae0d39b, + 0x2cdbb4fe, 0xe6961d51, 0xa0ad7a34, 0x81dde548, 0xc7e6822d, + 0x0dab2b82, 0x4b904ce7, 0xd8360806, 0x9e0d6f63, 0x5440c6cc, + 0x127ba1a9, 0x1df0042f, 0x5bcb634a, 0x9186cae5, 0xd7bdad80, + 0x441be961, 0x02208e04, 0xc86d27ab, 0x8e5640ce, 0xaf26dfb2, + 0xe91db8d7, 0x23501178, 0x656b761d, 0xf6cd32fc, 0xb0f65599, + 0x7abbfc36, 0x3c809b53, 0x385bc2cf, 0x7e60a5aa, 0xb42d0c05, + 0xf2166b60, 0x61b02f81, 0x278b48e4, 0xedc6e14b, 0xabfd862e, + 0x8a8d1952, 0xccb67e37, 0x06fbd798, 0x40c0b0fd, 0xd366f41c, + 0x955d9379, 0x5f103ad6, 0x192b5db3, 0x2c40f16b, 0x6a7b960e, + 0xa0363fa1, 0xe60d58c4, 0x75ab1c25, 0x33907b40, 0xf9ddd2ef, + 0xbfe6b58a, 0x9e962af6, 0xd8ad4d93, 0x12e0e43c, 0x54db8359, + 0xc77dc7b8, 0x8146a0dd, 0x4b0b0972, 0x0d306e17, 0x09eb378b, + 0x4fd050ee, 0x859df941, 0xc3a69e24, 0x5000dac5, 0x163bbda0, + 0xdc76140f, 0x9a4d736a, 0xbb3dec16, 0xfd068b73, 0x374b22dc, + 0x717045b9, 0xe2d60158, 0xa4ed663d, 0x6ea0cf92, 0x289ba8f7, + 0x27100d71, 0x612b6a14, 0xab66c3bb, 0xed5da4de, 0x7efbe03f, + 0x38c0875a, 0xf28d2ef5, 0xb4b64990, 0x95c6d6ec, 0xd3fdb189, + 0x19b01826, 0x5f8b7f43, 0xcc2d3ba2, 0x8a165cc7, 0x405bf568, + 0x0660920d, 0x02bbcb91, 0x4480acf4, 0x8ecd055b, 0xc8f6623e, + 0x5b5026df, 0x1d6b41ba, 0xd726e815, 0x911d8f70, 0xb06d100c, + 0xf6567769, 0x3c1bdec6, 0x7a20b9a3, 0xe986fd42, 0xafbd9a27, + 0x65f03388, 0x23cb54ed, 0x3ae0095e, 0x7cdb6e3b, 0xb696c794, + 0xf0ada0f1, 0x630be410, 0x25308375, 0xef7d2ada, 0xa9464dbf, + 0x8836d2c3, 0xce0db5a6, 0x04401c09, 0x427b7b6c, 0xd1dd3f8d, + 0x97e658e8, 0x5dabf147, 0x1b909622, 0x1f4bcfbe, 0x5970a8db, + 0x933d0174, 0xd5066611, 0x46a022f0, 0x009b4595, 0xcad6ec3a, + 0x8ced8b5f, 0xad9d1423, 0xeba67346, 0x21ebdae9, 0x67d0bd8c, + 0xf476f96d, 0xb24d9e08, 0x780037a7, 0x3e3b50c2, 0x31b0f544, + 0x778b9221, 0xbdc63b8e, 0xfbfd5ceb, 0x685b180a, 0x2e607f6f, + 0xe42dd6c0, 0xa216b1a5, 0x83662ed9, 0xc55d49bc, 0x0f10e013, + 0x492b8776, 0xda8dc397, 0x9cb6a4f2, 0x56fb0d5d, 0x10c06a38, + 0x141b33a4, 0x522054c1, 0x986dfd6e, 0xde569a0b, 0x4df0deea, + 0x0bcbb98f, 0xc1861020, 0x87bd7745, 0xa6cde839, 0xe0f68f5c, + 0x2abb26f3, 0x6c804196, 0xff260577, 0xb91d6212, 0x7350cbbd, + 0x356bacd8}}; + +#endif + +#endif + +#if N == 6 + +#if W == 8 + +local const z_crc_t FAR crc_braid_table[][256] = { + {0x00000000, 0x3db1ecdc, 0x7b63d9b8, 0x46d23564, 0xf6c7b370, + 0xcb765fac, 0x8da46ac8, 0xb0158614, 0x36fe60a1, 0x0b4f8c7d, + 0x4d9db919, 0x702c55c5, 0xc039d3d1, 0xfd883f0d, 0xbb5a0a69, + 0x86ebe6b5, 0x6dfcc142, 0x504d2d9e, 0x169f18fa, 0x2b2ef426, + 0x9b3b7232, 0xa68a9eee, 0xe058ab8a, 0xdde94756, 0x5b02a1e3, + 0x66b34d3f, 0x2061785b, 0x1dd09487, 0xadc51293, 0x9074fe4f, + 0xd6a6cb2b, 0xeb1727f7, 0xdbf98284, 0xe6486e58, 0xa09a5b3c, + 0x9d2bb7e0, 0x2d3e31f4, 0x108fdd28, 0x565de84c, 0x6bec0490, + 0xed07e225, 0xd0b60ef9, 0x96643b9d, 0xabd5d741, 0x1bc05155, + 0x2671bd89, 0x60a388ed, 0x5d126431, 0xb60543c6, 0x8bb4af1a, + 0xcd669a7e, 0xf0d776a2, 0x40c2f0b6, 0x7d731c6a, 0x3ba1290e, + 0x0610c5d2, 0x80fb2367, 0xbd4acfbb, 0xfb98fadf, 0xc6291603, + 0x763c9017, 0x4b8d7ccb, 0x0d5f49af, 0x30eea573, 0x6c820349, + 0x5133ef95, 0x17e1daf1, 0x2a50362d, 0x9a45b039, 0xa7f45ce5, + 0xe1266981, 0xdc97855d, 0x5a7c63e8, 0x67cd8f34, 0x211fba50, + 0x1cae568c, 0xacbbd098, 0x910a3c44, 0xd7d80920, 0xea69e5fc, + 0x017ec20b, 0x3ccf2ed7, 0x7a1d1bb3, 0x47acf76f, 0xf7b9717b, + 0xca089da7, 0x8cdaa8c3, 0xb16b441f, 0x3780a2aa, 0x0a314e76, + 0x4ce37b12, 0x715297ce, 0xc14711da, 0xfcf6fd06, 0xba24c862, + 0x879524be, 0xb77b81cd, 0x8aca6d11, 0xcc185875, 0xf1a9b4a9, + 0x41bc32bd, 0x7c0dde61, 0x3adfeb05, 0x076e07d9, 0x8185e16c, + 0xbc340db0, 0xfae638d4, 0xc757d408, 0x7742521c, 0x4af3bec0, + 0x0c218ba4, 0x31906778, 0xda87408f, 0xe736ac53, 0xa1e49937, + 0x9c5575eb, 0x2c40f3ff, 0x11f11f23, 0x57232a47, 0x6a92c69b, + 0xec79202e, 0xd1c8ccf2, 0x971af996, 0xaaab154a, 0x1abe935e, + 0x270f7f82, 0x61dd4ae6, 0x5c6ca63a, 0xd9040692, 0xe4b5ea4e, + 0xa267df2a, 0x9fd633f6, 0x2fc3b5e2, 0x1272593e, 0x54a06c5a, + 0x69118086, 0xeffa6633, 0xd24b8aef, 0x9499bf8b, 0xa9285357, + 0x193dd543, 0x248c399f, 0x625e0cfb, 0x5fefe027, 0xb4f8c7d0, + 0x89492b0c, 0xcf9b1e68, 0xf22af2b4, 0x423f74a0, 0x7f8e987c, + 0x395cad18, 0x04ed41c4, 0x8206a771, 0xbfb74bad, 0xf9657ec9, + 0xc4d49215, 0x74c11401, 0x4970f8dd, 0x0fa2cdb9, 0x32132165, + 0x02fd8416, 0x3f4c68ca, 0x799e5dae, 0x442fb172, 0xf43a3766, + 0xc98bdbba, 0x8f59eede, 0xb2e80202, 0x3403e4b7, 0x09b2086b, + 0x4f603d0f, 0x72d1d1d3, 0xc2c457c7, 0xff75bb1b, 0xb9a78e7f, + 0x841662a3, 0x6f014554, 0x52b0a988, 0x14629cec, 0x29d37030, + 0x99c6f624, 0xa4771af8, 0xe2a52f9c, 0xdf14c340, 0x59ff25f5, + 0x644ec929, 0x229cfc4d, 0x1f2d1091, 0xaf389685, 0x92897a59, + 0xd45b4f3d, 0xe9eaa3e1, 0xb58605db, 0x8837e907, 0xcee5dc63, + 0xf35430bf, 0x4341b6ab, 0x7ef05a77, 0x38226f13, 0x059383cf, + 0x8378657a, 0xbec989a6, 0xf81bbcc2, 0xc5aa501e, 0x75bfd60a, + 0x480e3ad6, 0x0edc0fb2, 0x336de36e, 0xd87ac499, 0xe5cb2845, + 0xa3191d21, 0x9ea8f1fd, 0x2ebd77e9, 0x130c9b35, 0x55deae51, + 0x686f428d, 0xee84a438, 0xd33548e4, 0x95e77d80, 0xa856915c, + 0x18431748, 0x25f2fb94, 0x6320cef0, 0x5e91222c, 0x6e7f875f, + 0x53ce6b83, 0x151c5ee7, 0x28adb23b, 0x98b8342f, 0xa509d8f3, + 0xe3dbed97, 0xde6a014b, 0x5881e7fe, 0x65300b22, 0x23e23e46, + 0x1e53d29a, 0xae46548e, 0x93f7b852, 0xd5258d36, 0xe89461ea, + 0x0383461d, 0x3e32aac1, 0x78e09fa5, 0x45517379, 0xf544f56d, + 0xc8f519b1, 0x8e272cd5, 0xb396c009, 0x357d26bc, 0x08ccca60, + 0x4e1eff04, 0x73af13d8, 0xc3ba95cc, 0xfe0b7910, 0xb8d94c74, + 0x8568a0a8}, + {0x00000000, 0x69790b65, 0xd2f216ca, 0xbb8b1daf, 0x7e952bd5, + 0x17ec20b0, 0xac673d1f, 0xc51e367a, 0xfd2a57aa, 0x94535ccf, + 0x2fd84160, 0x46a14a05, 0x83bf7c7f, 0xeac6771a, 0x514d6ab5, + 0x383461d0, 0x2125a915, 0x485ca270, 0xf3d7bfdf, 0x9aaeb4ba, + 0x5fb082c0, 0x36c989a5, 0x8d42940a, 0xe43b9f6f, 0xdc0ffebf, + 0xb576f5da, 0x0efde875, 0x6784e310, 0xa29ad56a, 0xcbe3de0f, + 0x7068c3a0, 0x1911c8c5, 0x424b522a, 0x2b32594f, 0x90b944e0, + 0xf9c04f85, 0x3cde79ff, 0x55a7729a, 0xee2c6f35, 0x87556450, + 0xbf610580, 0xd6180ee5, 0x6d93134a, 0x04ea182f, 0xc1f42e55, + 0xa88d2530, 0x1306389f, 0x7a7f33fa, 0x636efb3f, 0x0a17f05a, + 0xb19cedf5, 0xd8e5e690, 0x1dfbd0ea, 0x7482db8f, 0xcf09c620, + 0xa670cd45, 0x9e44ac95, 0xf73da7f0, 0x4cb6ba5f, 0x25cfb13a, + 0xe0d18740, 0x89a88c25, 0x3223918a, 0x5b5a9aef, 0x8496a454, + 0xedefaf31, 0x5664b29e, 0x3f1db9fb, 0xfa038f81, 0x937a84e4, + 0x28f1994b, 0x4188922e, 0x79bcf3fe, 0x10c5f89b, 0xab4ee534, + 0xc237ee51, 0x0729d82b, 0x6e50d34e, 0xd5dbcee1, 0xbca2c584, + 0xa5b30d41, 0xccca0624, 0x77411b8b, 0x1e3810ee, 0xdb262694, + 0xb25f2df1, 0x09d4305e, 0x60ad3b3b, 0x58995aeb, 0x31e0518e, + 0x8a6b4c21, 0xe3124744, 0x260c713e, 0x4f757a5b, 0xf4fe67f4, + 0x9d876c91, 0xc6ddf67e, 0xafa4fd1b, 0x142fe0b4, 0x7d56ebd1, + 0xb848ddab, 0xd131d6ce, 0x6abacb61, 0x03c3c004, 0x3bf7a1d4, + 0x528eaab1, 0xe905b71e, 0x807cbc7b, 0x45628a01, 0x2c1b8164, + 0x97909ccb, 0xfee997ae, 0xe7f85f6b, 0x8e81540e, 0x350a49a1, + 0x5c7342c4, 0x996d74be, 0xf0147fdb, 0x4b9f6274, 0x22e66911, + 0x1ad208c1, 0x73ab03a4, 0xc8201e0b, 0xa159156e, 0x64472314, + 0x0d3e2871, 0xb6b535de, 0xdfcc3ebb, 0xd25c4ee9, 0xbb25458c, + 0x00ae5823, 0x69d75346, 0xacc9653c, 0xc5b06e59, 0x7e3b73f6, + 0x17427893, 0x2f761943, 0x460f1226, 0xfd840f89, 0x94fd04ec, + 0x51e33296, 0x389a39f3, 0x8311245c, 0xea682f39, 0xf379e7fc, + 0x9a00ec99, 0x218bf136, 0x48f2fa53, 0x8deccc29, 0xe495c74c, + 0x5f1edae3, 0x3667d186, 0x0e53b056, 0x672abb33, 0xdca1a69c, + 0xb5d8adf9, 0x70c69b83, 0x19bf90e6, 0xa2348d49, 0xcb4d862c, + 0x90171cc3, 0xf96e17a6, 0x42e50a09, 0x2b9c016c, 0xee823716, + 0x87fb3c73, 0x3c7021dc, 0x55092ab9, 0x6d3d4b69, 0x0444400c, + 0xbfcf5da3, 0xd6b656c6, 0x13a860bc, 0x7ad16bd9, 0xc15a7676, + 0xa8237d13, 0xb132b5d6, 0xd84bbeb3, 0x63c0a31c, 0x0ab9a879, + 0xcfa79e03, 0xa6de9566, 0x1d5588c9, 0x742c83ac, 0x4c18e27c, + 0x2561e919, 0x9eeaf4b6, 0xf793ffd3, 0x328dc9a9, 0x5bf4c2cc, + 0xe07fdf63, 0x8906d406, 0x56caeabd, 0x3fb3e1d8, 0x8438fc77, + 0xed41f712, 0x285fc168, 0x4126ca0d, 0xfaadd7a2, 0x93d4dcc7, + 0xabe0bd17, 0xc299b672, 0x7912abdd, 0x106ba0b8, 0xd57596c2, + 0xbc0c9da7, 0x07878008, 0x6efe8b6d, 0x77ef43a8, 0x1e9648cd, + 0xa51d5562, 0xcc645e07, 0x097a687d, 0x60036318, 0xdb887eb7, + 0xb2f175d2, 0x8ac51402, 0xe3bc1f67, 0x583702c8, 0x314e09ad, + 0xf4503fd7, 0x9d2934b2, 0x26a2291d, 0x4fdb2278, 0x1481b897, + 0x7df8b3f2, 0xc673ae5d, 0xaf0aa538, 0x6a149342, 0x036d9827, + 0xb8e68588, 0xd19f8eed, 0xe9abef3d, 0x80d2e458, 0x3b59f9f7, + 0x5220f292, 0x973ec4e8, 0xfe47cf8d, 0x45ccd222, 0x2cb5d947, + 0x35a41182, 0x5cdd1ae7, 0xe7560748, 0x8e2f0c2d, 0x4b313a57, + 0x22483132, 0x99c32c9d, 0xf0ba27f8, 0xc88e4628, 0xa1f74d4d, + 0x1a7c50e2, 0x73055b87, 0xb61b6dfd, 0xdf626698, 0x64e97b37, + 0x0d907052}, + {0x00000000, 0x7fc99b93, 0xff933726, 0x805aacb5, 0x2457680d, + 0x5b9ef39e, 0xdbc45f2b, 0xa40dc4b8, 0x48aed01a, 0x37674b89, + 0xb73de73c, 0xc8f47caf, 0x6cf9b817, 0x13302384, 0x936a8f31, + 0xeca314a2, 0x915da034, 0xee943ba7, 0x6ece9712, 0x11070c81, + 0xb50ac839, 0xcac353aa, 0x4a99ff1f, 0x3550648c, 0xd9f3702e, + 0xa63aebbd, 0x26604708, 0x59a9dc9b, 0xfda41823, 0x826d83b0, + 0x02372f05, 0x7dfeb496, 0xf9ca4629, 0x8603ddba, 0x0659710f, + 0x7990ea9c, 0xdd9d2e24, 0xa254b5b7, 0x220e1902, 0x5dc78291, + 0xb1649633, 0xcead0da0, 0x4ef7a115, 0x313e3a86, 0x9533fe3e, + 0xeafa65ad, 0x6aa0c918, 0x1569528b, 0x6897e61d, 0x175e7d8e, + 0x9704d13b, 0xe8cd4aa8, 0x4cc08e10, 0x33091583, 0xb353b936, + 0xcc9a22a5, 0x20393607, 0x5ff0ad94, 0xdfaa0121, 0xa0639ab2, + 0x046e5e0a, 0x7ba7c599, 0xfbfd692c, 0x8434f2bf, 0x28e58a13, + 0x572c1180, 0xd776bd35, 0xa8bf26a6, 0x0cb2e21e, 0x737b798d, + 0xf321d538, 0x8ce84eab, 0x604b5a09, 0x1f82c19a, 0x9fd86d2f, + 0xe011f6bc, 0x441c3204, 0x3bd5a997, 0xbb8f0522, 0xc4469eb1, + 0xb9b82a27, 0xc671b1b4, 0x462b1d01, 0x39e28692, 0x9def422a, + 0xe226d9b9, 0x627c750c, 0x1db5ee9f, 0xf116fa3d, 0x8edf61ae, + 0x0e85cd1b, 0x714c5688, 0xd5419230, 0xaa8809a3, 0x2ad2a516, + 0x551b3e85, 0xd12fcc3a, 0xaee657a9, 0x2ebcfb1c, 0x5175608f, + 0xf578a437, 0x8ab13fa4, 0x0aeb9311, 0x75220882, 0x99811c20, + 0xe64887b3, 0x66122b06, 0x19dbb095, 0xbdd6742d, 0xc21fefbe, + 0x4245430b, 0x3d8cd898, 0x40726c0e, 0x3fbbf79d, 0xbfe15b28, + 0xc028c0bb, 0x64250403, 0x1bec9f90, 0x9bb63325, 0xe47fa8b6, + 0x08dcbc14, 0x77152787, 0xf74f8b32, 0x888610a1, 0x2c8bd419, + 0x53424f8a, 0xd318e33f, 0xacd178ac, 0x51cb1426, 0x2e028fb5, + 0xae582300, 0xd191b893, 0x759c7c2b, 0x0a55e7b8, 0x8a0f4b0d, + 0xf5c6d09e, 0x1965c43c, 0x66ac5faf, 0xe6f6f31a, 0x993f6889, + 0x3d32ac31, 0x42fb37a2, 0xc2a19b17, 0xbd680084, 0xc096b412, + 0xbf5f2f81, 0x3f058334, 0x40cc18a7, 0xe4c1dc1f, 0x9b08478c, + 0x1b52eb39, 0x649b70aa, 0x88386408, 0xf7f1ff9b, 0x77ab532e, + 0x0862c8bd, 0xac6f0c05, 0xd3a69796, 0x53fc3b23, 0x2c35a0b0, + 0xa801520f, 0xd7c8c99c, 0x57926529, 0x285bfeba, 0x8c563a02, + 0xf39fa191, 0x73c50d24, 0x0c0c96b7, 0xe0af8215, 0x9f661986, + 0x1f3cb533, 0x60f52ea0, 0xc4f8ea18, 0xbb31718b, 0x3b6bdd3e, + 0x44a246ad, 0x395cf23b, 0x469569a8, 0xc6cfc51d, 0xb9065e8e, + 0x1d0b9a36, 0x62c201a5, 0xe298ad10, 0x9d513683, 0x71f22221, + 0x0e3bb9b2, 0x8e611507, 0xf1a88e94, 0x55a54a2c, 0x2a6cd1bf, + 0xaa367d0a, 0xd5ffe699, 0x792e9e35, 0x06e705a6, 0x86bda913, + 0xf9743280, 0x5d79f638, 0x22b06dab, 0xa2eac11e, 0xdd235a8d, + 0x31804e2f, 0x4e49d5bc, 0xce137909, 0xb1dae29a, 0x15d72622, + 0x6a1ebdb1, 0xea441104, 0x958d8a97, 0xe8733e01, 0x97baa592, + 0x17e00927, 0x682992b4, 0xcc24560c, 0xb3edcd9f, 0x33b7612a, + 0x4c7efab9, 0xa0ddee1b, 0xdf147588, 0x5f4ed93d, 0x208742ae, + 0x848a8616, 0xfb431d85, 0x7b19b130, 0x04d02aa3, 0x80e4d81c, + 0xff2d438f, 0x7f77ef3a, 0x00be74a9, 0xa4b3b011, 0xdb7a2b82, + 0x5b208737, 0x24e91ca4, 0xc84a0806, 0xb7839395, 0x37d93f20, + 0x4810a4b3, 0xec1d600b, 0x93d4fb98, 0x138e572d, 0x6c47ccbe, + 0x11b97828, 0x6e70e3bb, 0xee2a4f0e, 0x91e3d49d, 0x35ee1025, + 0x4a278bb6, 0xca7d2703, 0xb5b4bc90, 0x5917a832, 0x26de33a1, + 0xa6849f14, 0xd94d0487, 0x7d40c03f, 0x02895bac, 0x82d3f719, + 0xfd1a6c8a}, + {0x00000000, 0xa396284c, 0x9c5d56d9, 0x3fcb7e95, 0xe3cbabf3, + 0x405d83bf, 0x7f96fd2a, 0xdc00d566, 0x1ce651a7, 0xbf7079eb, + 0x80bb077e, 0x232d2f32, 0xff2dfa54, 0x5cbbd218, 0x6370ac8d, + 0xc0e684c1, 0x39cca34e, 0x9a5a8b02, 0xa591f597, 0x0607dddb, + 0xda0708bd, 0x799120f1, 0x465a5e64, 0xe5cc7628, 0x252af2e9, + 0x86bcdaa5, 0xb977a430, 0x1ae18c7c, 0xc6e1591a, 0x65777156, + 0x5abc0fc3, 0xf92a278f, 0x7399469c, 0xd00f6ed0, 0xefc41045, + 0x4c523809, 0x9052ed6f, 0x33c4c523, 0x0c0fbbb6, 0xaf9993fa, + 0x6f7f173b, 0xcce93f77, 0xf32241e2, 0x50b469ae, 0x8cb4bcc8, + 0x2f229484, 0x10e9ea11, 0xb37fc25d, 0x4a55e5d2, 0xe9c3cd9e, + 0xd608b30b, 0x759e9b47, 0xa99e4e21, 0x0a08666d, 0x35c318f8, + 0x965530b4, 0x56b3b475, 0xf5259c39, 0xcaeee2ac, 0x6978cae0, + 0xb5781f86, 0x16ee37ca, 0x2925495f, 0x8ab36113, 0xe7328d38, + 0x44a4a574, 0x7b6fdbe1, 0xd8f9f3ad, 0x04f926cb, 0xa76f0e87, + 0x98a47012, 0x3b32585e, 0xfbd4dc9f, 0x5842f4d3, 0x67898a46, + 0xc41fa20a, 0x181f776c, 0xbb895f20, 0x844221b5, 0x27d409f9, + 0xdefe2e76, 0x7d68063a, 0x42a378af, 0xe13550e3, 0x3d358585, + 0x9ea3adc9, 0xa168d35c, 0x02fefb10, 0xc2187fd1, 0x618e579d, + 0x5e452908, 0xfdd30144, 0x21d3d422, 0x8245fc6e, 0xbd8e82fb, + 0x1e18aab7, 0x94abcba4, 0x373de3e8, 0x08f69d7d, 0xab60b531, + 0x77606057, 0xd4f6481b, 0xeb3d368e, 0x48ab1ec2, 0x884d9a03, + 0x2bdbb24f, 0x1410ccda, 0xb786e496, 0x6b8631f0, 0xc81019bc, + 0xf7db6729, 0x544d4f65, 0xad6768ea, 0x0ef140a6, 0x313a3e33, + 0x92ac167f, 0x4eacc319, 0xed3aeb55, 0xd2f195c0, 0x7167bd8c, + 0xb181394d, 0x12171101, 0x2ddc6f94, 0x8e4a47d8, 0x524a92be, + 0xf1dcbaf2, 0xce17c467, 0x6d81ec2b, 0x15141c31, 0xb682347d, + 0x89494ae8, 0x2adf62a4, 0xf6dfb7c2, 0x55499f8e, 0x6a82e11b, + 0xc914c957, 0x09f24d96, 0xaa6465da, 0x95af1b4f, 0x36393303, + 0xea39e665, 0x49afce29, 0x7664b0bc, 0xd5f298f0, 0x2cd8bf7f, + 0x8f4e9733, 0xb085e9a6, 0x1313c1ea, 0xcf13148c, 0x6c853cc0, + 0x534e4255, 0xf0d86a19, 0x303eeed8, 0x93a8c694, 0xac63b801, + 0x0ff5904d, 0xd3f5452b, 0x70636d67, 0x4fa813f2, 0xec3e3bbe, + 0x668d5aad, 0xc51b72e1, 0xfad00c74, 0x59462438, 0x8546f15e, + 0x26d0d912, 0x191ba787, 0xba8d8fcb, 0x7a6b0b0a, 0xd9fd2346, + 0xe6365dd3, 0x45a0759f, 0x99a0a0f9, 0x3a3688b5, 0x05fdf620, + 0xa66bde6c, 0x5f41f9e3, 0xfcd7d1af, 0xc31caf3a, 0x608a8776, + 0xbc8a5210, 0x1f1c7a5c, 0x20d704c9, 0x83412c85, 0x43a7a844, + 0xe0318008, 0xdffafe9d, 0x7c6cd6d1, 0xa06c03b7, 0x03fa2bfb, + 0x3c31556e, 0x9fa77d22, 0xf2269109, 0x51b0b945, 0x6e7bc7d0, + 0xcdedef9c, 0x11ed3afa, 0xb27b12b6, 0x8db06c23, 0x2e26446f, + 0xeec0c0ae, 0x4d56e8e2, 0x729d9677, 0xd10bbe3b, 0x0d0b6b5d, + 0xae9d4311, 0x91563d84, 0x32c015c8, 0xcbea3247, 0x687c1a0b, + 0x57b7649e, 0xf4214cd2, 0x282199b4, 0x8bb7b1f8, 0xb47ccf6d, + 0x17eae721, 0xd70c63e0, 0x749a4bac, 0x4b513539, 0xe8c71d75, + 0x34c7c813, 0x9751e05f, 0xa89a9eca, 0x0b0cb686, 0x81bfd795, + 0x2229ffd9, 0x1de2814c, 0xbe74a900, 0x62747c66, 0xc1e2542a, + 0xfe292abf, 0x5dbf02f3, 0x9d598632, 0x3ecfae7e, 0x0104d0eb, + 0xa292f8a7, 0x7e922dc1, 0xdd04058d, 0xe2cf7b18, 0x41595354, + 0xb87374db, 0x1be55c97, 0x242e2202, 0x87b80a4e, 0x5bb8df28, + 0xf82ef764, 0xc7e589f1, 0x6473a1bd, 0xa495257c, 0x07030d30, + 0x38c873a5, 0x9b5e5be9, 0x475e8e8f, 0xe4c8a6c3, 0xdb03d856, + 0x7895f01a}, + {0x00000000, 0x2a283862, 0x545070c4, 0x7e7848a6, 0xa8a0e188, + 0x8288d9ea, 0xfcf0914c, 0xd6d8a92e, 0x8a30c551, 0xa018fd33, + 0xde60b595, 0xf4488df7, 0x229024d9, 0x08b81cbb, 0x76c0541d, + 0x5ce86c7f, 0xcf108ce3, 0xe538b481, 0x9b40fc27, 0xb168c445, + 0x67b06d6b, 0x4d985509, 0x33e01daf, 0x19c825cd, 0x452049b2, + 0x6f0871d0, 0x11703976, 0x3b580114, 0xed80a83a, 0xc7a89058, + 0xb9d0d8fe, 0x93f8e09c, 0x45501f87, 0x6f7827e5, 0x11006f43, + 0x3b285721, 0xedf0fe0f, 0xc7d8c66d, 0xb9a08ecb, 0x9388b6a9, + 0xcf60dad6, 0xe548e2b4, 0x9b30aa12, 0xb1189270, 0x67c03b5e, + 0x4de8033c, 0x33904b9a, 0x19b873f8, 0x8a409364, 0xa068ab06, + 0xde10e3a0, 0xf438dbc2, 0x22e072ec, 0x08c84a8e, 0x76b00228, + 0x5c983a4a, 0x00705635, 0x2a586e57, 0x542026f1, 0x7e081e93, + 0xa8d0b7bd, 0x82f88fdf, 0xfc80c779, 0xd6a8ff1b, 0x8aa03f0e, + 0xa088076c, 0xdef04fca, 0xf4d877a8, 0x2200de86, 0x0828e6e4, + 0x7650ae42, 0x5c789620, 0x0090fa5f, 0x2ab8c23d, 0x54c08a9b, + 0x7ee8b2f9, 0xa8301bd7, 0x821823b5, 0xfc606b13, 0xd6485371, + 0x45b0b3ed, 0x6f988b8f, 0x11e0c329, 0x3bc8fb4b, 0xed105265, + 0xc7386a07, 0xb94022a1, 0x93681ac3, 0xcf8076bc, 0xe5a84ede, + 0x9bd00678, 0xb1f83e1a, 0x67209734, 0x4d08af56, 0x3370e7f0, + 0x1958df92, 0xcff02089, 0xe5d818eb, 0x9ba0504d, 0xb188682f, + 0x6750c101, 0x4d78f963, 0x3300b1c5, 0x192889a7, 0x45c0e5d8, + 0x6fe8ddba, 0x1190951c, 0x3bb8ad7e, 0xed600450, 0xc7483c32, + 0xb9307494, 0x93184cf6, 0x00e0ac6a, 0x2ac89408, 0x54b0dcae, + 0x7e98e4cc, 0xa8404de2, 0x82687580, 0xfc103d26, 0xd6380544, + 0x8ad0693b, 0xa0f85159, 0xde8019ff, 0xf4a8219d, 0x227088b3, + 0x0858b0d1, 0x7620f877, 0x5c08c015, 0xce31785d, 0xe419403f, + 0x9a610899, 0xb04930fb, 0x669199d5, 0x4cb9a1b7, 0x32c1e911, + 0x18e9d173, 0x4401bd0c, 0x6e29856e, 0x1051cdc8, 0x3a79f5aa, + 0xeca15c84, 0xc68964e6, 0xb8f12c40, 0x92d91422, 0x0121f4be, + 0x2b09ccdc, 0x5571847a, 0x7f59bc18, 0xa9811536, 0x83a92d54, + 0xfdd165f2, 0xd7f95d90, 0x8b1131ef, 0xa139098d, 0xdf41412b, + 0xf5697949, 0x23b1d067, 0x0999e805, 0x77e1a0a3, 0x5dc998c1, + 0x8b6167da, 0xa1495fb8, 0xdf31171e, 0xf5192f7c, 0x23c18652, + 0x09e9be30, 0x7791f696, 0x5db9cef4, 0x0151a28b, 0x2b799ae9, + 0x5501d24f, 0x7f29ea2d, 0xa9f14303, 0x83d97b61, 0xfda133c7, + 0xd7890ba5, 0x4471eb39, 0x6e59d35b, 0x10219bfd, 0x3a09a39f, + 0xecd10ab1, 0xc6f932d3, 0xb8817a75, 0x92a94217, 0xce412e68, + 0xe469160a, 0x9a115eac, 0xb03966ce, 0x66e1cfe0, 0x4cc9f782, + 0x32b1bf24, 0x18998746, 0x44914753, 0x6eb97f31, 0x10c13797, + 0x3ae90ff5, 0xec31a6db, 0xc6199eb9, 0xb861d61f, 0x9249ee7d, + 0xcea18202, 0xe489ba60, 0x9af1f2c6, 0xb0d9caa4, 0x6601638a, + 0x4c295be8, 0x3251134e, 0x18792b2c, 0x8b81cbb0, 0xa1a9f3d2, + 0xdfd1bb74, 0xf5f98316, 0x23212a38, 0x0909125a, 0x77715afc, + 0x5d59629e, 0x01b10ee1, 0x2b993683, 0x55e17e25, 0x7fc94647, + 0xa911ef69, 0x8339d70b, 0xfd419fad, 0xd769a7cf, 0x01c158d4, + 0x2be960b6, 0x55912810, 0x7fb91072, 0xa961b95c, 0x8349813e, + 0xfd31c998, 0xd719f1fa, 0x8bf19d85, 0xa1d9a5e7, 0xdfa1ed41, + 0xf589d523, 0x23517c0d, 0x0979446f, 0x77010cc9, 0x5d2934ab, + 0xced1d437, 0xe4f9ec55, 0x9a81a4f3, 0xb0a99c91, 0x667135bf, + 0x4c590ddd, 0x3221457b, 0x18097d19, 0x44e11166, 0x6ec92904, + 0x10b161a2, 0x3a9959c0, 0xec41f0ee, 0xc669c88c, 0xb811802a, + 0x9239b848}, + {0x00000000, 0x4713f6fb, 0x8e27edf6, 0xc9341b0d, 0xc73eddad, + 0x802d2b56, 0x4919305b, 0x0e0ac6a0, 0x550cbd1b, 0x121f4be0, + 0xdb2b50ed, 0x9c38a616, 0x923260b6, 0xd521964d, 0x1c158d40, + 0x5b067bbb, 0xaa197a36, 0xed0a8ccd, 0x243e97c0, 0x632d613b, + 0x6d27a79b, 0x2a345160, 0xe3004a6d, 0xa413bc96, 0xff15c72d, + 0xb80631d6, 0x71322adb, 0x3621dc20, 0x382b1a80, 0x7f38ec7b, + 0xb60cf776, 0xf11f018d, 0x8f43f22d, 0xc85004d6, 0x01641fdb, + 0x4677e920, 0x487d2f80, 0x0f6ed97b, 0xc65ac276, 0x8149348d, + 0xda4f4f36, 0x9d5cb9cd, 0x5468a2c0, 0x137b543b, 0x1d71929b, + 0x5a626460, 0x93567f6d, 0xd4458996, 0x255a881b, 0x62497ee0, + 0xab7d65ed, 0xec6e9316, 0xe26455b6, 0xa577a34d, 0x6c43b840, + 0x2b504ebb, 0x70563500, 0x3745c3fb, 0xfe71d8f6, 0xb9622e0d, + 0xb768e8ad, 0xf07b1e56, 0x394f055b, 0x7e5cf3a0, 0xc5f6e21b, + 0x82e514e0, 0x4bd10fed, 0x0cc2f916, 0x02c83fb6, 0x45dbc94d, + 0x8cefd240, 0xcbfc24bb, 0x90fa5f00, 0xd7e9a9fb, 0x1eddb2f6, + 0x59ce440d, 0x57c482ad, 0x10d77456, 0xd9e36f5b, 0x9ef099a0, + 0x6fef982d, 0x28fc6ed6, 0xe1c875db, 0xa6db8320, 0xa8d14580, + 0xefc2b37b, 0x26f6a876, 0x61e55e8d, 0x3ae32536, 0x7df0d3cd, + 0xb4c4c8c0, 0xf3d73e3b, 0xfdddf89b, 0xbace0e60, 0x73fa156d, + 0x34e9e396, 0x4ab51036, 0x0da6e6cd, 0xc492fdc0, 0x83810b3b, + 0x8d8bcd9b, 0xca983b60, 0x03ac206d, 0x44bfd696, 0x1fb9ad2d, + 0x58aa5bd6, 0x919e40db, 0xd68db620, 0xd8877080, 0x9f94867b, + 0x56a09d76, 0x11b36b8d, 0xe0ac6a00, 0xa7bf9cfb, 0x6e8b87f6, + 0x2998710d, 0x2792b7ad, 0x60814156, 0xa9b55a5b, 0xeea6aca0, + 0xb5a0d71b, 0xf2b321e0, 0x3b873aed, 0x7c94cc16, 0x729e0ab6, + 0x358dfc4d, 0xfcb9e740, 0xbbaa11bb, 0x509cc277, 0x178f348c, + 0xdebb2f81, 0x99a8d97a, 0x97a21fda, 0xd0b1e921, 0x1985f22c, + 0x5e9604d7, 0x05907f6c, 0x42838997, 0x8bb7929a, 0xcca46461, + 0xc2aea2c1, 0x85bd543a, 0x4c894f37, 0x0b9ab9cc, 0xfa85b841, + 0xbd964eba, 0x74a255b7, 0x33b1a34c, 0x3dbb65ec, 0x7aa89317, + 0xb39c881a, 0xf48f7ee1, 0xaf89055a, 0xe89af3a1, 0x21aee8ac, + 0x66bd1e57, 0x68b7d8f7, 0x2fa42e0c, 0xe6903501, 0xa183c3fa, + 0xdfdf305a, 0x98ccc6a1, 0x51f8ddac, 0x16eb2b57, 0x18e1edf7, + 0x5ff21b0c, 0x96c60001, 0xd1d5f6fa, 0x8ad38d41, 0xcdc07bba, + 0x04f460b7, 0x43e7964c, 0x4ded50ec, 0x0afea617, 0xc3cabd1a, + 0x84d94be1, 0x75c64a6c, 0x32d5bc97, 0xfbe1a79a, 0xbcf25161, + 0xb2f897c1, 0xf5eb613a, 0x3cdf7a37, 0x7bcc8ccc, 0x20caf777, + 0x67d9018c, 0xaeed1a81, 0xe9feec7a, 0xe7f42ada, 0xa0e7dc21, + 0x69d3c72c, 0x2ec031d7, 0x956a206c, 0xd279d697, 0x1b4dcd9a, + 0x5c5e3b61, 0x5254fdc1, 0x15470b3a, 0xdc731037, 0x9b60e6cc, + 0xc0669d77, 0x87756b8c, 0x4e417081, 0x0952867a, 0x075840da, + 0x404bb621, 0x897fad2c, 0xce6c5bd7, 0x3f735a5a, 0x7860aca1, + 0xb154b7ac, 0xf6474157, 0xf84d87f7, 0xbf5e710c, 0x766a6a01, + 0x31799cfa, 0x6a7fe741, 0x2d6c11ba, 0xe4580ab7, 0xa34bfc4c, + 0xad413aec, 0xea52cc17, 0x2366d71a, 0x647521e1, 0x1a29d241, + 0x5d3a24ba, 0x940e3fb7, 0xd31dc94c, 0xdd170fec, 0x9a04f917, + 0x5330e21a, 0x142314e1, 0x4f256f5a, 0x083699a1, 0xc10282ac, + 0x86117457, 0x881bb2f7, 0xcf08440c, 0x063c5f01, 0x412fa9fa, + 0xb030a877, 0xf7235e8c, 0x3e174581, 0x7904b37a, 0x770e75da, + 0x301d8321, 0xf929982c, 0xbe3a6ed7, 0xe53c156c, 0xa22fe397, + 0x6b1bf89a, 0x2c080e61, 0x2202c8c1, 0x65113e3a, 0xac252537, + 0xeb36d3cc}, + {0x00000000, 0xa13984ee, 0x99020f9d, 0x383b8b73, 0xe975197b, + 0x484c9d95, 0x707716e6, 0xd14e9208, 0x099b34b7, 0xa8a2b059, + 0x90993b2a, 0x31a0bfc4, 0xe0ee2dcc, 0x41d7a922, 0x79ec2251, + 0xd8d5a6bf, 0x1336696e, 0xb20fed80, 0x8a3466f3, 0x2b0de21d, + 0xfa437015, 0x5b7af4fb, 0x63417f88, 0xc278fb66, 0x1aad5dd9, + 0xbb94d937, 0x83af5244, 0x2296d6aa, 0xf3d844a2, 0x52e1c04c, + 0x6ada4b3f, 0xcbe3cfd1, 0x266cd2dc, 0x87555632, 0xbf6edd41, + 0x1e5759af, 0xcf19cba7, 0x6e204f49, 0x561bc43a, 0xf72240d4, + 0x2ff7e66b, 0x8ece6285, 0xb6f5e9f6, 0x17cc6d18, 0xc682ff10, + 0x67bb7bfe, 0x5f80f08d, 0xfeb97463, 0x355abbb2, 0x94633f5c, + 0xac58b42f, 0x0d6130c1, 0xdc2fa2c9, 0x7d162627, 0x452dad54, + 0xe41429ba, 0x3cc18f05, 0x9df80beb, 0xa5c38098, 0x04fa0476, + 0xd5b4967e, 0x748d1290, 0x4cb699e3, 0xed8f1d0d, 0x4cd9a5b8, + 0xede02156, 0xd5dbaa25, 0x74e22ecb, 0xa5acbcc3, 0x0495382d, + 0x3caeb35e, 0x9d9737b0, 0x4542910f, 0xe47b15e1, 0xdc409e92, + 0x7d791a7c, 0xac378874, 0x0d0e0c9a, 0x353587e9, 0x940c0307, + 0x5fefccd6, 0xfed64838, 0xc6edc34b, 0x67d447a5, 0xb69ad5ad, + 0x17a35143, 0x2f98da30, 0x8ea15ede, 0x5674f861, 0xf74d7c8f, + 0xcf76f7fc, 0x6e4f7312, 0xbf01e11a, 0x1e3865f4, 0x2603ee87, + 0x873a6a69, 0x6ab57764, 0xcb8cf38a, 0xf3b778f9, 0x528efc17, + 0x83c06e1f, 0x22f9eaf1, 0x1ac26182, 0xbbfbe56c, 0x632e43d3, + 0xc217c73d, 0xfa2c4c4e, 0x5b15c8a0, 0x8a5b5aa8, 0x2b62de46, + 0x13595535, 0xb260d1db, 0x79831e0a, 0xd8ba9ae4, 0xe0811197, + 0x41b89579, 0x90f60771, 0x31cf839f, 0x09f408ec, 0xa8cd8c02, + 0x70182abd, 0xd121ae53, 0xe91a2520, 0x4823a1ce, 0x996d33c6, + 0x3854b728, 0x006f3c5b, 0xa156b8b5, 0x99b34b70, 0x388acf9e, + 0x00b144ed, 0xa188c003, 0x70c6520b, 0xd1ffd6e5, 0xe9c45d96, + 0x48fdd978, 0x90287fc7, 0x3111fb29, 0x092a705a, 0xa813f4b4, + 0x795d66bc, 0xd864e252, 0xe05f6921, 0x4166edcf, 0x8a85221e, + 0x2bbca6f0, 0x13872d83, 0xb2bea96d, 0x63f03b65, 0xc2c9bf8b, + 0xfaf234f8, 0x5bcbb016, 0x831e16a9, 0x22279247, 0x1a1c1934, + 0xbb259dda, 0x6a6b0fd2, 0xcb528b3c, 0xf369004f, 0x525084a1, + 0xbfdf99ac, 0x1ee61d42, 0x26dd9631, 0x87e412df, 0x56aa80d7, + 0xf7930439, 0xcfa88f4a, 0x6e910ba4, 0xb644ad1b, 0x177d29f5, + 0x2f46a286, 0x8e7f2668, 0x5f31b460, 0xfe08308e, 0xc633bbfd, + 0x670a3f13, 0xace9f0c2, 0x0dd0742c, 0x35ebff5f, 0x94d27bb1, + 0x459ce9b9, 0xe4a56d57, 0xdc9ee624, 0x7da762ca, 0xa572c475, + 0x044b409b, 0x3c70cbe8, 0x9d494f06, 0x4c07dd0e, 0xed3e59e0, + 0xd505d293, 0x743c567d, 0xd56aeec8, 0x74536a26, 0x4c68e155, + 0xed5165bb, 0x3c1ff7b3, 0x9d26735d, 0xa51df82e, 0x04247cc0, + 0xdcf1da7f, 0x7dc85e91, 0x45f3d5e2, 0xe4ca510c, 0x3584c304, + 0x94bd47ea, 0xac86cc99, 0x0dbf4877, 0xc65c87a6, 0x67650348, + 0x5f5e883b, 0xfe670cd5, 0x2f299edd, 0x8e101a33, 0xb62b9140, + 0x171215ae, 0xcfc7b311, 0x6efe37ff, 0x56c5bc8c, 0xf7fc3862, + 0x26b2aa6a, 0x878b2e84, 0xbfb0a5f7, 0x1e892119, 0xf3063c14, + 0x523fb8fa, 0x6a043389, 0xcb3db767, 0x1a73256f, 0xbb4aa181, + 0x83712af2, 0x2248ae1c, 0xfa9d08a3, 0x5ba48c4d, 0x639f073e, + 0xc2a683d0, 0x13e811d8, 0xb2d19536, 0x8aea1e45, 0x2bd39aab, + 0xe030557a, 0x4109d194, 0x79325ae7, 0xd80bde09, 0x09454c01, + 0xa87cc8ef, 0x9047439c, 0x317ec772, 0xe9ab61cd, 0x4892e523, + 0x70a96e50, 0xd190eabe, 0x00de78b6, 0xa1e7fc58, 0x99dc772b, + 0x38e5f3c5}, + {0x00000000, 0xe81790a1, 0x0b5e2703, 0xe349b7a2, 0x16bc4e06, + 0xfeabdea7, 0x1de26905, 0xf5f5f9a4, 0x2d789c0c, 0xc56f0cad, + 0x2626bb0f, 0xce312bae, 0x3bc4d20a, 0xd3d342ab, 0x309af509, + 0xd88d65a8, 0x5af13818, 0xb2e6a8b9, 0x51af1f1b, 0xb9b88fba, + 0x4c4d761e, 0xa45ae6bf, 0x4713511d, 0xaf04c1bc, 0x7789a414, + 0x9f9e34b5, 0x7cd78317, 0x94c013b6, 0x6135ea12, 0x89227ab3, + 0x6a6bcd11, 0x827c5db0, 0xb5e27030, 0x5df5e091, 0xbebc5733, + 0x56abc792, 0xa35e3e36, 0x4b49ae97, 0xa8001935, 0x40178994, + 0x989aec3c, 0x708d7c9d, 0x93c4cb3f, 0x7bd35b9e, 0x8e26a23a, + 0x6631329b, 0x85788539, 0x6d6f1598, 0xef134828, 0x0704d889, + 0xe44d6f2b, 0x0c5aff8a, 0xf9af062e, 0x11b8968f, 0xf2f1212d, + 0x1ae6b18c, 0xc26bd424, 0x2a7c4485, 0xc935f327, 0x21226386, + 0xd4d79a22, 0x3cc00a83, 0xdf89bd21, 0x379e2d80, 0xb0b5e621, + 0x58a27680, 0xbbebc122, 0x53fc5183, 0xa609a827, 0x4e1e3886, + 0xad578f24, 0x45401f85, 0x9dcd7a2d, 0x75daea8c, 0x96935d2e, + 0x7e84cd8f, 0x8b71342b, 0x6366a48a, 0x802f1328, 0x68388389, + 0xea44de39, 0x02534e98, 0xe11af93a, 0x090d699b, 0xfcf8903f, + 0x14ef009e, 0xf7a6b73c, 0x1fb1279d, 0xc73c4235, 0x2f2bd294, + 0xcc626536, 0x2475f597, 0xd1800c33, 0x39979c92, 0xdade2b30, + 0x32c9bb91, 0x05579611, 0xed4006b0, 0x0e09b112, 0xe61e21b3, + 0x13ebd817, 0xfbfc48b6, 0x18b5ff14, 0xf0a26fb5, 0x282f0a1d, + 0xc0389abc, 0x23712d1e, 0xcb66bdbf, 0x3e93441b, 0xd684d4ba, + 0x35cd6318, 0xdddaf3b9, 0x5fa6ae09, 0xb7b13ea8, 0x54f8890a, + 0xbcef19ab, 0x491ae00f, 0xa10d70ae, 0x4244c70c, 0xaa5357ad, + 0x72de3205, 0x9ac9a2a4, 0x79801506, 0x919785a7, 0x64627c03, + 0x8c75eca2, 0x6f3c5b00, 0x872bcba1, 0xba1aca03, 0x520d5aa2, + 0xb144ed00, 0x59537da1, 0xaca68405, 0x44b114a4, 0xa7f8a306, + 0x4fef33a7, 0x9762560f, 0x7f75c6ae, 0x9c3c710c, 0x742be1ad, + 0x81de1809, 0x69c988a8, 0x8a803f0a, 0x6297afab, 0xe0ebf21b, + 0x08fc62ba, 0xebb5d518, 0x03a245b9, 0xf657bc1d, 0x1e402cbc, + 0xfd099b1e, 0x151e0bbf, 0xcd936e17, 0x2584feb6, 0xc6cd4914, + 0x2edad9b5, 0xdb2f2011, 0x3338b0b0, 0xd0710712, 0x386697b3, + 0x0ff8ba33, 0xe7ef2a92, 0x04a69d30, 0xecb10d91, 0x1944f435, + 0xf1536494, 0x121ad336, 0xfa0d4397, 0x2280263f, 0xca97b69e, + 0x29de013c, 0xc1c9919d, 0x343c6839, 0xdc2bf898, 0x3f624f3a, + 0xd775df9b, 0x5509822b, 0xbd1e128a, 0x5e57a528, 0xb6403589, + 0x43b5cc2d, 0xaba25c8c, 0x48ebeb2e, 0xa0fc7b8f, 0x78711e27, + 0x90668e86, 0x732f3924, 0x9b38a985, 0x6ecd5021, 0x86dac080, + 0x65937722, 0x8d84e783, 0x0aaf2c22, 0xe2b8bc83, 0x01f10b21, + 0xe9e69b80, 0x1c136224, 0xf404f285, 0x174d4527, 0xff5ad586, + 0x27d7b02e, 0xcfc0208f, 0x2c89972d, 0xc49e078c, 0x316bfe28, + 0xd97c6e89, 0x3a35d92b, 0xd222498a, 0x505e143a, 0xb849849b, + 0x5b003339, 0xb317a398, 0x46e25a3c, 0xaef5ca9d, 0x4dbc7d3f, + 0xa5abed9e, 0x7d268836, 0x95311897, 0x7678af35, 0x9e6f3f94, + 0x6b9ac630, 0x838d5691, 0x60c4e133, 0x88d37192, 0xbf4d5c12, + 0x575accb3, 0xb4137b11, 0x5c04ebb0, 0xa9f11214, 0x41e682b5, + 0xa2af3517, 0x4ab8a5b6, 0x9235c01e, 0x7a2250bf, 0x996be71d, + 0x717c77bc, 0x84898e18, 0x6c9e1eb9, 0x8fd7a91b, 0x67c039ba, + 0xe5bc640a, 0x0dabf4ab, 0xeee24309, 0x06f5d3a8, 0xf3002a0c, + 0x1b17baad, 0xf85e0d0f, 0x10499dae, 0xc8c4f806, 0x20d368a7, + 0xc39adf05, 0x2b8d4fa4, 0xde78b600, 0x366f26a1, 0xd5269103, + 0x3d3101a2}}; + +local const z_word_t FAR crc_braid_big_table[][256] = { + {0x0000000000000000, 0xa19017e800000000, 0x03275e0b00000000, + 0xa2b749e300000000, 0x064ebc1600000000, 0xa7deabfe00000000, + 0x0569e21d00000000, 0xa4f9f5f500000000, 0x0c9c782d00000000, + 0xad0c6fc500000000, 0x0fbb262600000000, 0xae2b31ce00000000, + 0x0ad2c43b00000000, 0xab42d3d300000000, 0x09f59a3000000000, + 0xa8658dd800000000, 0x1838f15a00000000, 0xb9a8e6b200000000, + 0x1b1faf5100000000, 0xba8fb8b900000000, 0x1e764d4c00000000, + 0xbfe65aa400000000, 0x1d51134700000000, 0xbcc104af00000000, + 0x14a4897700000000, 0xb5349e9f00000000, 0x1783d77c00000000, + 0xb613c09400000000, 0x12ea356100000000, 0xb37a228900000000, + 0x11cd6b6a00000000, 0xb05d7c8200000000, 0x3070e2b500000000, + 0x91e0f55d00000000, 0x3357bcbe00000000, 0x92c7ab5600000000, + 0x363e5ea300000000, 0x97ae494b00000000, 0x351900a800000000, + 0x9489174000000000, 0x3cec9a9800000000, 0x9d7c8d7000000000, + 0x3fcbc49300000000, 0x9e5bd37b00000000, 0x3aa2268e00000000, + 0x9b32316600000000, 0x3985788500000000, 0x98156f6d00000000, + 0x284813ef00000000, 0x89d8040700000000, 0x2b6f4de400000000, + 0x8aff5a0c00000000, 0x2e06aff900000000, 0x8f96b81100000000, + 0x2d21f1f200000000, 0x8cb1e61a00000000, 0x24d46bc200000000, + 0x85447c2a00000000, 0x27f335c900000000, 0x8663222100000000, + 0x229ad7d400000000, 0x830ac03c00000000, 0x21bd89df00000000, + 0x802d9e3700000000, 0x21e6b5b000000000, 0x8076a25800000000, + 0x22c1ebbb00000000, 0x8351fc5300000000, 0x27a809a600000000, + 0x86381e4e00000000, 0x248f57ad00000000, 0x851f404500000000, + 0x2d7acd9d00000000, 0x8ceada7500000000, 0x2e5d939600000000, + 0x8fcd847e00000000, 0x2b34718b00000000, 0x8aa4666300000000, + 0x28132f8000000000, 0x8983386800000000, 0x39de44ea00000000, + 0x984e530200000000, 0x3af91ae100000000, 0x9b690d0900000000, + 0x3f90f8fc00000000, 0x9e00ef1400000000, 0x3cb7a6f700000000, + 0x9d27b11f00000000, 0x35423cc700000000, 0x94d22b2f00000000, + 0x366562cc00000000, 0x97f5752400000000, 0x330c80d100000000, + 0x929c973900000000, 0x302bdeda00000000, 0x91bbc93200000000, + 0x1196570500000000, 0xb00640ed00000000, 0x12b1090e00000000, + 0xb3211ee600000000, 0x17d8eb1300000000, 0xb648fcfb00000000, + 0x14ffb51800000000, 0xb56fa2f000000000, 0x1d0a2f2800000000, + 0xbc9a38c000000000, 0x1e2d712300000000, 0xbfbd66cb00000000, + 0x1b44933e00000000, 0xbad484d600000000, 0x1863cd3500000000, + 0xb9f3dadd00000000, 0x09aea65f00000000, 0xa83eb1b700000000, + 0x0a89f85400000000, 0xab19efbc00000000, 0x0fe01a4900000000, + 0xae700da100000000, 0x0cc7444200000000, 0xad5753aa00000000, + 0x0532de7200000000, 0xa4a2c99a00000000, 0x0615807900000000, + 0xa785979100000000, 0x037c626400000000, 0xa2ec758c00000000, + 0x005b3c6f00000000, 0xa1cb2b8700000000, 0x03ca1aba00000000, + 0xa25a0d5200000000, 0x00ed44b100000000, 0xa17d535900000000, + 0x0584a6ac00000000, 0xa414b14400000000, 0x06a3f8a700000000, + 0xa733ef4f00000000, 0x0f56629700000000, 0xaec6757f00000000, + 0x0c713c9c00000000, 0xade12b7400000000, 0x0918de8100000000, + 0xa888c96900000000, 0x0a3f808a00000000, 0xabaf976200000000, + 0x1bf2ebe000000000, 0xba62fc0800000000, 0x18d5b5eb00000000, + 0xb945a20300000000, 0x1dbc57f600000000, 0xbc2c401e00000000, + 0x1e9b09fd00000000, 0xbf0b1e1500000000, 0x176e93cd00000000, + 0xb6fe842500000000, 0x1449cdc600000000, 0xb5d9da2e00000000, + 0x11202fdb00000000, 0xb0b0383300000000, 0x120771d000000000, + 0xb397663800000000, 0x33baf80f00000000, 0x922aefe700000000, + 0x309da60400000000, 0x910db1ec00000000, 0x35f4441900000000, + 0x946453f100000000, 0x36d31a1200000000, 0x97430dfa00000000, + 0x3f26802200000000, 0x9eb697ca00000000, 0x3c01de2900000000, + 0x9d91c9c100000000, 0x39683c3400000000, 0x98f82bdc00000000, + 0x3a4f623f00000000, 0x9bdf75d700000000, 0x2b82095500000000, + 0x8a121ebd00000000, 0x28a5575e00000000, 0x893540b600000000, + 0x2dccb54300000000, 0x8c5ca2ab00000000, 0x2eebeb4800000000, + 0x8f7bfca000000000, 0x271e717800000000, 0x868e669000000000, + 0x24392f7300000000, 0x85a9389b00000000, 0x2150cd6e00000000, + 0x80c0da8600000000, 0x2277936500000000, 0x83e7848d00000000, + 0x222caf0a00000000, 0x83bcb8e200000000, 0x210bf10100000000, + 0x809be6e900000000, 0x2462131c00000000, 0x85f204f400000000, + 0x27454d1700000000, 0x86d55aff00000000, 0x2eb0d72700000000, + 0x8f20c0cf00000000, 0x2d97892c00000000, 0x8c079ec400000000, + 0x28fe6b3100000000, 0x896e7cd900000000, 0x2bd9353a00000000, + 0x8a4922d200000000, 0x3a145e5000000000, 0x9b8449b800000000, + 0x3933005b00000000, 0x98a317b300000000, 0x3c5ae24600000000, + 0x9dcaf5ae00000000, 0x3f7dbc4d00000000, 0x9eedaba500000000, + 0x3688267d00000000, 0x9718319500000000, 0x35af787600000000, + 0x943f6f9e00000000, 0x30c69a6b00000000, 0x91568d8300000000, + 0x33e1c46000000000, 0x9271d38800000000, 0x125c4dbf00000000, + 0xb3cc5a5700000000, 0x117b13b400000000, 0xb0eb045c00000000, + 0x1412f1a900000000, 0xb582e64100000000, 0x1735afa200000000, + 0xb6a5b84a00000000, 0x1ec0359200000000, 0xbf50227a00000000, + 0x1de76b9900000000, 0xbc777c7100000000, 0x188e898400000000, + 0xb91e9e6c00000000, 0x1ba9d78f00000000, 0xba39c06700000000, + 0x0a64bce500000000, 0xabf4ab0d00000000, 0x0943e2ee00000000, + 0xa8d3f50600000000, 0x0c2a00f300000000, 0xadba171b00000000, + 0x0f0d5ef800000000, 0xae9d491000000000, 0x06f8c4c800000000, + 0xa768d32000000000, 0x05df9ac300000000, 0xa44f8d2b00000000, + 0x00b678de00000000, 0xa1266f3600000000, 0x039126d500000000, + 0xa201313d00000000}, + {0x0000000000000000, 0xee8439a100000000, 0x9d0f029900000000, + 0x738b3b3800000000, 0x7b1975e900000000, 0x959d4c4800000000, + 0xe616777000000000, 0x08924ed100000000, 0xb7349b0900000000, + 0x59b0a2a800000000, 0x2a3b999000000000, 0xc4bfa03100000000, + 0xcc2deee000000000, 0x22a9d74100000000, 0x5122ec7900000000, + 0xbfa6d5d800000000, 0x6e69361300000000, 0x80ed0fb200000000, + 0xf366348a00000000, 0x1de20d2b00000000, 0x157043fa00000000, + 0xfbf47a5b00000000, 0x887f416300000000, 0x66fb78c200000000, + 0xd95dad1a00000000, 0x37d994bb00000000, 0x4452af8300000000, + 0xaad6962200000000, 0xa244d8f300000000, 0x4cc0e15200000000, + 0x3f4bda6a00000000, 0xd1cfe3cb00000000, 0xdcd26c2600000000, + 0x3256558700000000, 0x41dd6ebf00000000, 0xaf59571e00000000, + 0xa7cb19cf00000000, 0x494f206e00000000, 0x3ac41b5600000000, + 0xd44022f700000000, 0x6be6f72f00000000, 0x8562ce8e00000000, + 0xf6e9f5b600000000, 0x186dcc1700000000, 0x10ff82c600000000, + 0xfe7bbb6700000000, 0x8df0805f00000000, 0x6374b9fe00000000, + 0xb2bb5a3500000000, 0x5c3f639400000000, 0x2fb458ac00000000, + 0xc130610d00000000, 0xc9a22fdc00000000, 0x2726167d00000000, + 0x54ad2d4500000000, 0xba2914e400000000, 0x058fc13c00000000, + 0xeb0bf89d00000000, 0x9880c3a500000000, 0x7604fa0400000000, + 0x7e96b4d500000000, 0x90128d7400000000, 0xe399b64c00000000, + 0x0d1d8fed00000000, 0xb8a5d94c00000000, 0x5621e0ed00000000, + 0x25aadbd500000000, 0xcb2ee27400000000, 0xc3bcaca500000000, + 0x2d38950400000000, 0x5eb3ae3c00000000, 0xb037979d00000000, + 0x0f91424500000000, 0xe1157be400000000, 0x929e40dc00000000, + 0x7c1a797d00000000, 0x748837ac00000000, 0x9a0c0e0d00000000, + 0xe987353500000000, 0x07030c9400000000, 0xd6ccef5f00000000, + 0x3848d6fe00000000, 0x4bc3edc600000000, 0xa547d46700000000, + 0xadd59ab600000000, 0x4351a31700000000, 0x30da982f00000000, + 0xde5ea18e00000000, 0x61f8745600000000, 0x8f7c4df700000000, + 0xfcf776cf00000000, 0x12734f6e00000000, 0x1ae101bf00000000, + 0xf465381e00000000, 0x87ee032600000000, 0x696a3a8700000000, + 0x6477b56a00000000, 0x8af38ccb00000000, 0xf978b7f300000000, + 0x17fc8e5200000000, 0x1f6ec08300000000, 0xf1eaf92200000000, + 0x8261c21a00000000, 0x6ce5fbbb00000000, 0xd3432e6300000000, + 0x3dc717c200000000, 0x4e4c2cfa00000000, 0xa0c8155b00000000, + 0xa85a5b8a00000000, 0x46de622b00000000, 0x3555591300000000, + 0xdbd160b200000000, 0x0a1e837900000000, 0xe49abad800000000, + 0x971181e000000000, 0x7995b84100000000, 0x7107f69000000000, + 0x9f83cf3100000000, 0xec08f40900000000, 0x028ccda800000000, + 0xbd2a187000000000, 0x53ae21d100000000, 0x20251ae900000000, + 0xcea1234800000000, 0xc6336d9900000000, 0x28b7543800000000, + 0x5b3c6f0000000000, 0xb5b856a100000000, 0x704bb39900000000, + 0x9ecf8a3800000000, 0xed44b10000000000, 0x03c088a100000000, + 0x0b52c67000000000, 0xe5d6ffd100000000, 0x965dc4e900000000, + 0x78d9fd4800000000, 0xc77f289000000000, 0x29fb113100000000, + 0x5a702a0900000000, 0xb4f413a800000000, 0xbc665d7900000000, + 0x52e264d800000000, 0x21695fe000000000, 0xcfed664100000000, + 0x1e22858a00000000, 0xf0a6bc2b00000000, 0x832d871300000000, + 0x6da9beb200000000, 0x653bf06300000000, 0x8bbfc9c200000000, + 0xf834f2fa00000000, 0x16b0cb5b00000000, 0xa9161e8300000000, + 0x4792272200000000, 0x34191c1a00000000, 0xda9d25bb00000000, + 0xd20f6b6a00000000, 0x3c8b52cb00000000, 0x4f0069f300000000, + 0xa184505200000000, 0xac99dfbf00000000, 0x421de61e00000000, + 0x3196dd2600000000, 0xdf12e48700000000, 0xd780aa5600000000, + 0x390493f700000000, 0x4a8fa8cf00000000, 0xa40b916e00000000, + 0x1bad44b600000000, 0xf5297d1700000000, 0x86a2462f00000000, + 0x68267f8e00000000, 0x60b4315f00000000, 0x8e3008fe00000000, + 0xfdbb33c600000000, 0x133f0a6700000000, 0xc2f0e9ac00000000, + 0x2c74d00d00000000, 0x5fffeb3500000000, 0xb17bd29400000000, + 0xb9e99c4500000000, 0x576da5e400000000, 0x24e69edc00000000, + 0xca62a77d00000000, 0x75c472a500000000, 0x9b404b0400000000, + 0xe8cb703c00000000, 0x064f499d00000000, 0x0edd074c00000000, + 0xe0593eed00000000, 0x93d205d500000000, 0x7d563c7400000000, + 0xc8ee6ad500000000, 0x266a537400000000, 0x55e1684c00000000, + 0xbb6551ed00000000, 0xb3f71f3c00000000, 0x5d73269d00000000, + 0x2ef81da500000000, 0xc07c240400000000, 0x7fdaf1dc00000000, + 0x915ec87d00000000, 0xe2d5f34500000000, 0x0c51cae400000000, + 0x04c3843500000000, 0xea47bd9400000000, 0x99cc86ac00000000, + 0x7748bf0d00000000, 0xa6875cc600000000, 0x4803656700000000, + 0x3b885e5f00000000, 0xd50c67fe00000000, 0xdd9e292f00000000, + 0x331a108e00000000, 0x40912bb600000000, 0xae15121700000000, + 0x11b3c7cf00000000, 0xff37fe6e00000000, 0x8cbcc55600000000, + 0x6238fcf700000000, 0x6aaab22600000000, 0x842e8b8700000000, + 0xf7a5b0bf00000000, 0x1921891e00000000, 0x143c06f300000000, + 0xfab83f5200000000, 0x8933046a00000000, 0x67b73dcb00000000, + 0x6f25731a00000000, 0x81a14abb00000000, 0xf22a718300000000, + 0x1cae482200000000, 0xa3089dfa00000000, 0x4d8ca45b00000000, + 0x3e079f6300000000, 0xd083a6c200000000, 0xd811e81300000000, + 0x3695d1b200000000, 0x451eea8a00000000, 0xab9ad32b00000000, + 0x7a5530e000000000, 0x94d1094100000000, 0xe75a327900000000, + 0x09de0bd800000000, 0x014c450900000000, 0xefc87ca800000000, + 0x9c43479000000000, 0x72c77e3100000000, 0xcd61abe900000000, + 0x23e5924800000000, 0x506ea97000000000, 0xbeea90d100000000, + 0xb678de0000000000, 0x58fce7a100000000, 0x2b77dc9900000000, + 0xc5f3e53800000000}, + {0x0000000000000000, 0xfbf6134700000000, 0xf6ed278e00000000, + 0x0d1b34c900000000, 0xaddd3ec700000000, 0x562b2d8000000000, + 0x5b30194900000000, 0xa0c60a0e00000000, 0x1bbd0c5500000000, + 0xe04b1f1200000000, 0xed502bdb00000000, 0x16a6389c00000000, + 0xb660329200000000, 0x4d9621d500000000, 0x408d151c00000000, + 0xbb7b065b00000000, 0x367a19aa00000000, 0xcd8c0aed00000000, + 0xc0973e2400000000, 0x3b612d6300000000, 0x9ba7276d00000000, + 0x6051342a00000000, 0x6d4a00e300000000, 0x96bc13a400000000, + 0x2dc715ff00000000, 0xd63106b800000000, 0xdb2a327100000000, + 0x20dc213600000000, 0x801a2b3800000000, 0x7bec387f00000000, + 0x76f70cb600000000, 0x8d011ff100000000, 0x2df2438f00000000, + 0xd60450c800000000, 0xdb1f640100000000, 0x20e9774600000000, + 0x802f7d4800000000, 0x7bd96e0f00000000, 0x76c25ac600000000, + 0x8d34498100000000, 0x364f4fda00000000, 0xcdb95c9d00000000, + 0xc0a2685400000000, 0x3b547b1300000000, 0x9b92711d00000000, + 0x6064625a00000000, 0x6d7f569300000000, 0x968945d400000000, + 0x1b885a2500000000, 0xe07e496200000000, 0xed657dab00000000, + 0x16936eec00000000, 0xb65564e200000000, 0x4da377a500000000, + 0x40b8436c00000000, 0xbb4e502b00000000, 0x0035567000000000, + 0xfbc3453700000000, 0xf6d871fe00000000, 0x0d2e62b900000000, + 0xade868b700000000, 0x561e7bf000000000, 0x5b054f3900000000, + 0xa0f35c7e00000000, 0x1be2f6c500000000, 0xe014e58200000000, + 0xed0fd14b00000000, 0x16f9c20c00000000, 0xb63fc80200000000, + 0x4dc9db4500000000, 0x40d2ef8c00000000, 0xbb24fccb00000000, + 0x005ffa9000000000, 0xfba9e9d700000000, 0xf6b2dd1e00000000, + 0x0d44ce5900000000, 0xad82c45700000000, 0x5674d71000000000, + 0x5b6fe3d900000000, 0xa099f09e00000000, 0x2d98ef6f00000000, + 0xd66efc2800000000, 0xdb75c8e100000000, 0x2083dba600000000, + 0x8045d1a800000000, 0x7bb3c2ef00000000, 0x76a8f62600000000, + 0x8d5ee56100000000, 0x3625e33a00000000, 0xcdd3f07d00000000, + 0xc0c8c4b400000000, 0x3b3ed7f300000000, 0x9bf8ddfd00000000, + 0x600eceba00000000, 0x6d15fa7300000000, 0x96e3e93400000000, + 0x3610b54a00000000, 0xcde6a60d00000000, 0xc0fd92c400000000, + 0x3b0b818300000000, 0x9bcd8b8d00000000, 0x603b98ca00000000, + 0x6d20ac0300000000, 0x96d6bf4400000000, 0x2dadb91f00000000, + 0xd65baa5800000000, 0xdb409e9100000000, 0x20b68dd600000000, + 0x807087d800000000, 0x7b86949f00000000, 0x769da05600000000, + 0x8d6bb31100000000, 0x006aace000000000, 0xfb9cbfa700000000, + 0xf6878b6e00000000, 0x0d71982900000000, 0xadb7922700000000, + 0x5641816000000000, 0x5b5ab5a900000000, 0xa0aca6ee00000000, + 0x1bd7a0b500000000, 0xe021b3f200000000, 0xed3a873b00000000, + 0x16cc947c00000000, 0xb60a9e7200000000, 0x4dfc8d3500000000, + 0x40e7b9fc00000000, 0xbb11aabb00000000, 0x77c29c5000000000, + 0x8c348f1700000000, 0x812fbbde00000000, 0x7ad9a89900000000, + 0xda1fa29700000000, 0x21e9b1d000000000, 0x2cf2851900000000, + 0xd704965e00000000, 0x6c7f900500000000, 0x9789834200000000, + 0x9a92b78b00000000, 0x6164a4cc00000000, 0xc1a2aec200000000, + 0x3a54bd8500000000, 0x374f894c00000000, 0xccb99a0b00000000, + 0x41b885fa00000000, 0xba4e96bd00000000, 0xb755a27400000000, + 0x4ca3b13300000000, 0xec65bb3d00000000, 0x1793a87a00000000, + 0x1a889cb300000000, 0xe17e8ff400000000, 0x5a0589af00000000, + 0xa1f39ae800000000, 0xace8ae2100000000, 0x571ebd6600000000, + 0xf7d8b76800000000, 0x0c2ea42f00000000, 0x013590e600000000, + 0xfac383a100000000, 0x5a30dfdf00000000, 0xa1c6cc9800000000, + 0xacddf85100000000, 0x572beb1600000000, 0xf7ede11800000000, + 0x0c1bf25f00000000, 0x0100c69600000000, 0xfaf6d5d100000000, + 0x418dd38a00000000, 0xba7bc0cd00000000, 0xb760f40400000000, + 0x4c96e74300000000, 0xec50ed4d00000000, 0x17a6fe0a00000000, + 0x1abdcac300000000, 0xe14bd98400000000, 0x6c4ac67500000000, + 0x97bcd53200000000, 0x9aa7e1fb00000000, 0x6151f2bc00000000, + 0xc197f8b200000000, 0x3a61ebf500000000, 0x377adf3c00000000, + 0xcc8ccc7b00000000, 0x77f7ca2000000000, 0x8c01d96700000000, + 0x811aedae00000000, 0x7aecfee900000000, 0xda2af4e700000000, + 0x21dce7a000000000, 0x2cc7d36900000000, 0xd731c02e00000000, + 0x6c206a9500000000, 0x97d679d200000000, 0x9acd4d1b00000000, + 0x613b5e5c00000000, 0xc1fd545200000000, 0x3a0b471500000000, + 0x371073dc00000000, 0xcce6609b00000000, 0x779d66c000000000, + 0x8c6b758700000000, 0x8170414e00000000, 0x7a86520900000000, + 0xda40580700000000, 0x21b64b4000000000, 0x2cad7f8900000000, + 0xd75b6cce00000000, 0x5a5a733f00000000, 0xa1ac607800000000, + 0xacb754b100000000, 0x574147f600000000, 0xf7874df800000000, + 0x0c715ebf00000000, 0x016a6a7600000000, 0xfa9c793100000000, + 0x41e77f6a00000000, 0xba116c2d00000000, 0xb70a58e400000000, + 0x4cfc4ba300000000, 0xec3a41ad00000000, 0x17cc52ea00000000, + 0x1ad7662300000000, 0xe121756400000000, 0x41d2291a00000000, + 0xba243a5d00000000, 0xb73f0e9400000000, 0x4cc91dd300000000, + 0xec0f17dd00000000, 0x17f9049a00000000, 0x1ae2305300000000, + 0xe114231400000000, 0x5a6f254f00000000, 0xa199360800000000, + 0xac8202c100000000, 0x5774118600000000, 0xf7b21b8800000000, + 0x0c4408cf00000000, 0x015f3c0600000000, 0xfaa92f4100000000, + 0x77a830b000000000, 0x8c5e23f700000000, 0x8145173e00000000, + 0x7ab3047900000000, 0xda750e7700000000, 0x21831d3000000000, + 0x2c9829f900000000, 0xd76e3abe00000000, 0x6c153ce500000000, + 0x97e32fa200000000, 0x9af81b6b00000000, 0x610e082c00000000, + 0xc1c8022200000000, 0x3a3e116500000000, 0x372525ac00000000, + 0xccd336eb00000000}, + {0x0000000000000000, 0x6238282a00000000, 0xc470505400000000, + 0xa648787e00000000, 0x88e1a0a800000000, 0xead9888200000000, + 0x4c91f0fc00000000, 0x2ea9d8d600000000, 0x51c5308a00000000, + 0x33fd18a000000000, 0x95b560de00000000, 0xf78d48f400000000, + 0xd924902200000000, 0xbb1cb80800000000, 0x1d54c07600000000, + 0x7f6ce85c00000000, 0xe38c10cf00000000, 0x81b438e500000000, + 0x27fc409b00000000, 0x45c468b100000000, 0x6b6db06700000000, + 0x0955984d00000000, 0xaf1de03300000000, 0xcd25c81900000000, + 0xb249204500000000, 0xd071086f00000000, 0x7639701100000000, + 0x1401583b00000000, 0x3aa880ed00000000, 0x5890a8c700000000, + 0xfed8d0b900000000, 0x9ce0f89300000000, 0x871f504500000000, + 0xe527786f00000000, 0x436f001100000000, 0x2157283b00000000, + 0x0ffef0ed00000000, 0x6dc6d8c700000000, 0xcb8ea0b900000000, + 0xa9b6889300000000, 0xd6da60cf00000000, 0xb4e248e500000000, + 0x12aa309b00000000, 0x709218b100000000, 0x5e3bc06700000000, + 0x3c03e84d00000000, 0x9a4b903300000000, 0xf873b81900000000, + 0x6493408a00000000, 0x06ab68a000000000, 0xa0e310de00000000, + 0xc2db38f400000000, 0xec72e02200000000, 0x8e4ac80800000000, + 0x2802b07600000000, 0x4a3a985c00000000, 0x3556700000000000, + 0x576e582a00000000, 0xf126205400000000, 0x931e087e00000000, + 0xbdb7d0a800000000, 0xdf8ff88200000000, 0x79c780fc00000000, + 0x1bffa8d600000000, 0x0e3fa08a00000000, 0x6c0788a000000000, + 0xca4ff0de00000000, 0xa877d8f400000000, 0x86de002200000000, + 0xe4e6280800000000, 0x42ae507600000000, 0x2096785c00000000, + 0x5ffa900000000000, 0x3dc2b82a00000000, 0x9b8ac05400000000, + 0xf9b2e87e00000000, 0xd71b30a800000000, 0xb523188200000000, + 0x136b60fc00000000, 0x715348d600000000, 0xedb3b04500000000, + 0x8f8b986f00000000, 0x29c3e01100000000, 0x4bfbc83b00000000, + 0x655210ed00000000, 0x076a38c700000000, 0xa12240b900000000, + 0xc31a689300000000, 0xbc7680cf00000000, 0xde4ea8e500000000, + 0x7806d09b00000000, 0x1a3ef8b100000000, 0x3497206700000000, + 0x56af084d00000000, 0xf0e7703300000000, 0x92df581900000000, + 0x8920f0cf00000000, 0xeb18d8e500000000, 0x4d50a09b00000000, + 0x2f6888b100000000, 0x01c1506700000000, 0x63f9784d00000000, + 0xc5b1003300000000, 0xa789281900000000, 0xd8e5c04500000000, + 0xbadde86f00000000, 0x1c95901100000000, 0x7eadb83b00000000, + 0x500460ed00000000, 0x323c48c700000000, 0x947430b900000000, + 0xf64c189300000000, 0x6aace00000000000, 0x0894c82a00000000, + 0xaedcb05400000000, 0xcce4987e00000000, 0xe24d40a800000000, + 0x8075688200000000, 0x263d10fc00000000, 0x440538d600000000, + 0x3b69d08a00000000, 0x5951f8a000000000, 0xff1980de00000000, + 0x9d21a8f400000000, 0xb388702200000000, 0xd1b0580800000000, + 0x77f8207600000000, 0x15c0085c00000000, 0x5d7831ce00000000, + 0x3f4019e400000000, 0x9908619a00000000, 0xfb3049b000000000, + 0xd599916600000000, 0xb7a1b94c00000000, 0x11e9c13200000000, + 0x73d1e91800000000, 0x0cbd014400000000, 0x6e85296e00000000, + 0xc8cd511000000000, 0xaaf5793a00000000, 0x845ca1ec00000000, + 0xe66489c600000000, 0x402cf1b800000000, 0x2214d99200000000, + 0xbef4210100000000, 0xdccc092b00000000, 0x7a84715500000000, + 0x18bc597f00000000, 0x361581a900000000, 0x542da98300000000, + 0xf265d1fd00000000, 0x905df9d700000000, 0xef31118b00000000, + 0x8d0939a100000000, 0x2b4141df00000000, 0x497969f500000000, + 0x67d0b12300000000, 0x05e8990900000000, 0xa3a0e17700000000, + 0xc198c95d00000000, 0xda67618b00000000, 0xb85f49a100000000, + 0x1e1731df00000000, 0x7c2f19f500000000, 0x5286c12300000000, + 0x30bee90900000000, 0x96f6917700000000, 0xf4ceb95d00000000, + 0x8ba2510100000000, 0xe99a792b00000000, 0x4fd2015500000000, + 0x2dea297f00000000, 0x0343f1a900000000, 0x617bd98300000000, + 0xc733a1fd00000000, 0xa50b89d700000000, 0x39eb714400000000, + 0x5bd3596e00000000, 0xfd9b211000000000, 0x9fa3093a00000000, + 0xb10ad1ec00000000, 0xd332f9c600000000, 0x757a81b800000000, + 0x1742a99200000000, 0x682e41ce00000000, 0x0a1669e400000000, + 0xac5e119a00000000, 0xce6639b000000000, 0xe0cfe16600000000, + 0x82f7c94c00000000, 0x24bfb13200000000, 0x4687991800000000, + 0x5347914400000000, 0x317fb96e00000000, 0x9737c11000000000, + 0xf50fe93a00000000, 0xdba631ec00000000, 0xb99e19c600000000, + 0x1fd661b800000000, 0x7dee499200000000, 0x0282a1ce00000000, + 0x60ba89e400000000, 0xc6f2f19a00000000, 0xa4cad9b000000000, + 0x8a63016600000000, 0xe85b294c00000000, 0x4e13513200000000, + 0x2c2b791800000000, 0xb0cb818b00000000, 0xd2f3a9a100000000, + 0x74bbd1df00000000, 0x1683f9f500000000, 0x382a212300000000, + 0x5a12090900000000, 0xfc5a717700000000, 0x9e62595d00000000, + 0xe10eb10100000000, 0x8336992b00000000, 0x257ee15500000000, + 0x4746c97f00000000, 0x69ef11a900000000, 0x0bd7398300000000, + 0xad9f41fd00000000, 0xcfa769d700000000, 0xd458c10100000000, + 0xb660e92b00000000, 0x1028915500000000, 0x7210b97f00000000, + 0x5cb961a900000000, 0x3e81498300000000, 0x98c931fd00000000, + 0xfaf119d700000000, 0x859df18b00000000, 0xe7a5d9a100000000, + 0x41eda1df00000000, 0x23d589f500000000, 0x0d7c512300000000, + 0x6f44790900000000, 0xc90c017700000000, 0xab34295d00000000, + 0x37d4d1ce00000000, 0x55ecf9e400000000, 0xf3a4819a00000000, + 0x919ca9b000000000, 0xbf35716600000000, 0xdd0d594c00000000, + 0x7b45213200000000, 0x197d091800000000, 0x6611e14400000000, + 0x0429c96e00000000, 0xa261b11000000000, 0xc059993a00000000, + 0xeef041ec00000000, 0x8cc869c600000000, 0x2a8011b800000000, + 0x48b8399200000000}, + {0x0000000000000000, 0x4c2896a300000000, 0xd9565d9c00000000, + 0x957ecb3f00000000, 0xf3abcbe300000000, 0xbf835d4000000000, + 0x2afd967f00000000, 0x66d500dc00000000, 0xa751e61c00000000, + 0xeb7970bf00000000, 0x7e07bb8000000000, 0x322f2d2300000000, + 0x54fa2dff00000000, 0x18d2bb5c00000000, 0x8dac706300000000, + 0xc184e6c000000000, 0x4ea3cc3900000000, 0x028b5a9a00000000, + 0x97f591a500000000, 0xdbdd070600000000, 0xbd0807da00000000, + 0xf120917900000000, 0x645e5a4600000000, 0x2876cce500000000, + 0xe9f22a2500000000, 0xa5dabc8600000000, 0x30a477b900000000, + 0x7c8ce11a00000000, 0x1a59e1c600000000, 0x5671776500000000, + 0xc30fbc5a00000000, 0x8f272af900000000, 0x9c46997300000000, + 0xd06e0fd000000000, 0x4510c4ef00000000, 0x0938524c00000000, + 0x6fed529000000000, 0x23c5c43300000000, 0xb6bb0f0c00000000, + 0xfa9399af00000000, 0x3b177f6f00000000, 0x773fe9cc00000000, + 0xe24122f300000000, 0xae69b45000000000, 0xc8bcb48c00000000, + 0x8494222f00000000, 0x11eae91000000000, 0x5dc27fb300000000, + 0xd2e5554a00000000, 0x9ecdc3e900000000, 0x0bb308d600000000, + 0x479b9e7500000000, 0x214e9ea900000000, 0x6d66080a00000000, + 0xf818c33500000000, 0xb430559600000000, 0x75b4b35600000000, + 0x399c25f500000000, 0xace2eeca00000000, 0xe0ca786900000000, + 0x861f78b500000000, 0xca37ee1600000000, 0x5f49252900000000, + 0x1361b38a00000000, 0x388d32e700000000, 0x74a5a44400000000, + 0xe1db6f7b00000000, 0xadf3f9d800000000, 0xcb26f90400000000, + 0x870e6fa700000000, 0x1270a49800000000, 0x5e58323b00000000, + 0x9fdcd4fb00000000, 0xd3f4425800000000, 0x468a896700000000, + 0x0aa21fc400000000, 0x6c771f1800000000, 0x205f89bb00000000, + 0xb521428400000000, 0xf909d42700000000, 0x762efede00000000, + 0x3a06687d00000000, 0xaf78a34200000000, 0xe35035e100000000, + 0x8585353d00000000, 0xc9ada39e00000000, 0x5cd368a100000000, + 0x10fbfe0200000000, 0xd17f18c200000000, 0x9d578e6100000000, + 0x0829455e00000000, 0x4401d3fd00000000, 0x22d4d32100000000, + 0x6efc458200000000, 0xfb828ebd00000000, 0xb7aa181e00000000, + 0xa4cbab9400000000, 0xe8e33d3700000000, 0x7d9df60800000000, + 0x31b560ab00000000, 0x5760607700000000, 0x1b48f6d400000000, + 0x8e363deb00000000, 0xc21eab4800000000, 0x039a4d8800000000, + 0x4fb2db2b00000000, 0xdacc101400000000, 0x96e486b700000000, + 0xf031866b00000000, 0xbc1910c800000000, 0x2967dbf700000000, + 0x654f4d5400000000, 0xea6867ad00000000, 0xa640f10e00000000, + 0x333e3a3100000000, 0x7f16ac9200000000, 0x19c3ac4e00000000, + 0x55eb3aed00000000, 0xc095f1d200000000, 0x8cbd677100000000, + 0x4d3981b100000000, 0x0111171200000000, 0x946fdc2d00000000, + 0xd8474a8e00000000, 0xbe924a5200000000, 0xf2badcf100000000, + 0x67c417ce00000000, 0x2bec816d00000000, 0x311c141500000000, + 0x7d3482b600000000, 0xe84a498900000000, 0xa462df2a00000000, + 0xc2b7dff600000000, 0x8e9f495500000000, 0x1be1826a00000000, + 0x57c914c900000000, 0x964df20900000000, 0xda6564aa00000000, + 0x4f1baf9500000000, 0x0333393600000000, 0x65e639ea00000000, + 0x29ceaf4900000000, 0xbcb0647600000000, 0xf098f2d500000000, + 0x7fbfd82c00000000, 0x33974e8f00000000, 0xa6e985b000000000, + 0xeac1131300000000, 0x8c1413cf00000000, 0xc03c856c00000000, + 0x55424e5300000000, 0x196ad8f000000000, 0xd8ee3e3000000000, + 0x94c6a89300000000, 0x01b863ac00000000, 0x4d90f50f00000000, + 0x2b45f5d300000000, 0x676d637000000000, 0xf213a84f00000000, + 0xbe3b3eec00000000, 0xad5a8d6600000000, 0xe1721bc500000000, + 0x740cd0fa00000000, 0x3824465900000000, 0x5ef1468500000000, + 0x12d9d02600000000, 0x87a71b1900000000, 0xcb8f8dba00000000, + 0x0a0b6b7a00000000, 0x4623fdd900000000, 0xd35d36e600000000, + 0x9f75a04500000000, 0xf9a0a09900000000, 0xb588363a00000000, + 0x20f6fd0500000000, 0x6cde6ba600000000, 0xe3f9415f00000000, + 0xafd1d7fc00000000, 0x3aaf1cc300000000, 0x76878a6000000000, + 0x10528abc00000000, 0x5c7a1c1f00000000, 0xc904d72000000000, + 0x852c418300000000, 0x44a8a74300000000, 0x088031e000000000, + 0x9dfefadf00000000, 0xd1d66c7c00000000, 0xb7036ca000000000, + 0xfb2bfa0300000000, 0x6e55313c00000000, 0x227da79f00000000, + 0x099126f200000000, 0x45b9b05100000000, 0xd0c77b6e00000000, + 0x9cefedcd00000000, 0xfa3aed1100000000, 0xb6127bb200000000, + 0x236cb08d00000000, 0x6f44262e00000000, 0xaec0c0ee00000000, + 0xe2e8564d00000000, 0x77969d7200000000, 0x3bbe0bd100000000, + 0x5d6b0b0d00000000, 0x11439dae00000000, 0x843d569100000000, + 0xc815c03200000000, 0x4732eacb00000000, 0x0b1a7c6800000000, + 0x9e64b75700000000, 0xd24c21f400000000, 0xb499212800000000, + 0xf8b1b78b00000000, 0x6dcf7cb400000000, 0x21e7ea1700000000, + 0xe0630cd700000000, 0xac4b9a7400000000, 0x3935514b00000000, + 0x751dc7e800000000, 0x13c8c73400000000, 0x5fe0519700000000, + 0xca9e9aa800000000, 0x86b60c0b00000000, 0x95d7bf8100000000, + 0xd9ff292200000000, 0x4c81e21d00000000, 0x00a974be00000000, + 0x667c746200000000, 0x2a54e2c100000000, 0xbf2a29fe00000000, + 0xf302bf5d00000000, 0x3286599d00000000, 0x7eaecf3e00000000, + 0xebd0040100000000, 0xa7f892a200000000, 0xc12d927e00000000, + 0x8d0504dd00000000, 0x187bcfe200000000, 0x5453594100000000, + 0xdb7473b800000000, 0x975ce51b00000000, 0x02222e2400000000, + 0x4e0ab88700000000, 0x28dfb85b00000000, 0x64f72ef800000000, + 0xf189e5c700000000, 0xbda1736400000000, 0x7c2595a400000000, + 0x300d030700000000, 0xa573c83800000000, 0xe95b5e9b00000000, + 0x8f8e5e4700000000, 0xc3a6c8e400000000, 0x56d803db00000000, + 0x1af0957800000000}, + {0x0000000000000000, 0x939bc97f00000000, 0x263793ff00000000, + 0xb5ac5a8000000000, 0x0d68572400000000, 0x9ef39e5b00000000, + 0x2b5fc4db00000000, 0xb8c40da400000000, 0x1ad0ae4800000000, + 0x894b673700000000, 0x3ce73db700000000, 0xaf7cf4c800000000, + 0x17b8f96c00000000, 0x8423301300000000, 0x318f6a9300000000, + 0xa214a3ec00000000, 0x34a05d9100000000, 0xa73b94ee00000000, + 0x1297ce6e00000000, 0x810c071100000000, 0x39c80ab500000000, + 0xaa53c3ca00000000, 0x1fff994a00000000, 0x8c64503500000000, + 0x2e70f3d900000000, 0xbdeb3aa600000000, 0x0847602600000000, + 0x9bdca95900000000, 0x2318a4fd00000000, 0xb0836d8200000000, + 0x052f370200000000, 0x96b4fe7d00000000, 0x2946caf900000000, + 0xbadd038600000000, 0x0f71590600000000, 0x9cea907900000000, + 0x242e9ddd00000000, 0xb7b554a200000000, 0x02190e2200000000, + 0x9182c75d00000000, 0x339664b100000000, 0xa00dadce00000000, + 0x15a1f74e00000000, 0x863a3e3100000000, 0x3efe339500000000, + 0xad65faea00000000, 0x18c9a06a00000000, 0x8b52691500000000, + 0x1de6976800000000, 0x8e7d5e1700000000, 0x3bd1049700000000, + 0xa84acde800000000, 0x108ec04c00000000, 0x8315093300000000, + 0x36b953b300000000, 0xa5229acc00000000, 0x0736392000000000, + 0x94adf05f00000000, 0x2101aadf00000000, 0xb29a63a000000000, + 0x0a5e6e0400000000, 0x99c5a77b00000000, 0x2c69fdfb00000000, + 0xbff2348400000000, 0x138ae52800000000, 0x80112c5700000000, + 0x35bd76d700000000, 0xa626bfa800000000, 0x1ee2b20c00000000, + 0x8d797b7300000000, 0x38d521f300000000, 0xab4ee88c00000000, + 0x095a4b6000000000, 0x9ac1821f00000000, 0x2f6dd89f00000000, + 0xbcf611e000000000, 0x04321c4400000000, 0x97a9d53b00000000, + 0x22058fbb00000000, 0xb19e46c400000000, 0x272ab8b900000000, + 0xb4b171c600000000, 0x011d2b4600000000, 0x9286e23900000000, + 0x2a42ef9d00000000, 0xb9d926e200000000, 0x0c757c6200000000, + 0x9feeb51d00000000, 0x3dfa16f100000000, 0xae61df8e00000000, + 0x1bcd850e00000000, 0x88564c7100000000, 0x309241d500000000, + 0xa30988aa00000000, 0x16a5d22a00000000, 0x853e1b5500000000, + 0x3acc2fd100000000, 0xa957e6ae00000000, 0x1cfbbc2e00000000, + 0x8f60755100000000, 0x37a478f500000000, 0xa43fb18a00000000, + 0x1193eb0a00000000, 0x8208227500000000, 0x201c819900000000, + 0xb38748e600000000, 0x062b126600000000, 0x95b0db1900000000, + 0x2d74d6bd00000000, 0xbeef1fc200000000, 0x0b43454200000000, + 0x98d88c3d00000000, 0x0e6c724000000000, 0x9df7bb3f00000000, + 0x285be1bf00000000, 0xbbc028c000000000, 0x0304256400000000, + 0x909fec1b00000000, 0x2533b69b00000000, 0xb6a87fe400000000, + 0x14bcdc0800000000, 0x8727157700000000, 0x328b4ff700000000, + 0xa110868800000000, 0x19d48b2c00000000, 0x8a4f425300000000, + 0x3fe318d300000000, 0xac78d1ac00000000, 0x2614cb5100000000, + 0xb58f022e00000000, 0x002358ae00000000, 0x93b891d100000000, + 0x2b7c9c7500000000, 0xb8e7550a00000000, 0x0d4b0f8a00000000, + 0x9ed0c6f500000000, 0x3cc4651900000000, 0xaf5fac6600000000, + 0x1af3f6e600000000, 0x89683f9900000000, 0x31ac323d00000000, + 0xa237fb4200000000, 0x179ba1c200000000, 0x840068bd00000000, + 0x12b496c000000000, 0x812f5fbf00000000, 0x3483053f00000000, + 0xa718cc4000000000, 0x1fdcc1e400000000, 0x8c47089b00000000, + 0x39eb521b00000000, 0xaa709b6400000000, 0x0864388800000000, + 0x9bfff1f700000000, 0x2e53ab7700000000, 0xbdc8620800000000, + 0x050c6fac00000000, 0x9697a6d300000000, 0x233bfc5300000000, + 0xb0a0352c00000000, 0x0f5201a800000000, 0x9cc9c8d700000000, + 0x2965925700000000, 0xbafe5b2800000000, 0x023a568c00000000, + 0x91a19ff300000000, 0x240dc57300000000, 0xb7960c0c00000000, + 0x1582afe000000000, 0x8619669f00000000, 0x33b53c1f00000000, + 0xa02ef56000000000, 0x18eaf8c400000000, 0x8b7131bb00000000, + 0x3edd6b3b00000000, 0xad46a24400000000, 0x3bf25c3900000000, + 0xa869954600000000, 0x1dc5cfc600000000, 0x8e5e06b900000000, + 0x369a0b1d00000000, 0xa501c26200000000, 0x10ad98e200000000, + 0x8336519d00000000, 0x2122f27100000000, 0xb2b93b0e00000000, + 0x0715618e00000000, 0x948ea8f100000000, 0x2c4aa55500000000, + 0xbfd16c2a00000000, 0x0a7d36aa00000000, 0x99e6ffd500000000, + 0x359e2e7900000000, 0xa605e70600000000, 0x13a9bd8600000000, + 0x803274f900000000, 0x38f6795d00000000, 0xab6db02200000000, + 0x1ec1eaa200000000, 0x8d5a23dd00000000, 0x2f4e803100000000, + 0xbcd5494e00000000, 0x097913ce00000000, 0x9ae2dab100000000, + 0x2226d71500000000, 0xb1bd1e6a00000000, 0x041144ea00000000, + 0x978a8d9500000000, 0x013e73e800000000, 0x92a5ba9700000000, + 0x2709e01700000000, 0xb492296800000000, 0x0c5624cc00000000, + 0x9fcdedb300000000, 0x2a61b73300000000, 0xb9fa7e4c00000000, + 0x1beedda000000000, 0x887514df00000000, 0x3dd94e5f00000000, + 0xae42872000000000, 0x16868a8400000000, 0x851d43fb00000000, + 0x30b1197b00000000, 0xa32ad00400000000, 0x1cd8e48000000000, + 0x8f432dff00000000, 0x3aef777f00000000, 0xa974be0000000000, + 0x11b0b3a400000000, 0x822b7adb00000000, 0x3787205b00000000, + 0xa41ce92400000000, 0x06084ac800000000, 0x959383b700000000, + 0x203fd93700000000, 0xb3a4104800000000, 0x0b601dec00000000, + 0x98fbd49300000000, 0x2d578e1300000000, 0xbecc476c00000000, + 0x2878b91100000000, 0xbbe3706e00000000, 0x0e4f2aee00000000, + 0x9dd4e39100000000, 0x2510ee3500000000, 0xb68b274a00000000, + 0x03277dca00000000, 0x90bcb4b500000000, 0x32a8175900000000, + 0xa133de2600000000, 0x149f84a600000000, 0x87044dd900000000, + 0x3fc0407d00000000, 0xac5b890200000000, 0x19f7d38200000000, + 0x8a6c1afd00000000}, + {0x0000000000000000, 0x650b796900000000, 0xca16f2d200000000, + 0xaf1d8bbb00000000, 0xd52b957e00000000, 0xb020ec1700000000, + 0x1f3d67ac00000000, 0x7a361ec500000000, 0xaa572afd00000000, + 0xcf5c539400000000, 0x6041d82f00000000, 0x054aa14600000000, + 0x7f7cbf8300000000, 0x1a77c6ea00000000, 0xb56a4d5100000000, + 0xd061343800000000, 0x15a9252100000000, 0x70a25c4800000000, + 0xdfbfd7f300000000, 0xbab4ae9a00000000, 0xc082b05f00000000, + 0xa589c93600000000, 0x0a94428d00000000, 0x6f9f3be400000000, + 0xbffe0fdc00000000, 0xdaf576b500000000, 0x75e8fd0e00000000, + 0x10e3846700000000, 0x6ad59aa200000000, 0x0fdee3cb00000000, + 0xa0c3687000000000, 0xc5c8111900000000, 0x2a524b4200000000, + 0x4f59322b00000000, 0xe044b99000000000, 0x854fc0f900000000, + 0xff79de3c00000000, 0x9a72a75500000000, 0x356f2cee00000000, + 0x5064558700000000, 0x800561bf00000000, 0xe50e18d600000000, + 0x4a13936d00000000, 0x2f18ea0400000000, 0x552ef4c100000000, + 0x30258da800000000, 0x9f38061300000000, 0xfa337f7a00000000, + 0x3ffb6e6300000000, 0x5af0170a00000000, 0xf5ed9cb100000000, + 0x90e6e5d800000000, 0xead0fb1d00000000, 0x8fdb827400000000, + 0x20c609cf00000000, 0x45cd70a600000000, 0x95ac449e00000000, + 0xf0a73df700000000, 0x5fbab64c00000000, 0x3ab1cf2500000000, + 0x4087d1e000000000, 0x258ca88900000000, 0x8a91233200000000, + 0xef9a5a5b00000000, 0x54a4968400000000, 0x31afefed00000000, + 0x9eb2645600000000, 0xfbb91d3f00000000, 0x818f03fa00000000, + 0xe4847a9300000000, 0x4b99f12800000000, 0x2e92884100000000, + 0xfef3bc7900000000, 0x9bf8c51000000000, 0x34e54eab00000000, + 0x51ee37c200000000, 0x2bd8290700000000, 0x4ed3506e00000000, + 0xe1cedbd500000000, 0x84c5a2bc00000000, 0x410db3a500000000, + 0x2406cacc00000000, 0x8b1b417700000000, 0xee10381e00000000, + 0x942626db00000000, 0xf12d5fb200000000, 0x5e30d40900000000, + 0x3b3bad6000000000, 0xeb5a995800000000, 0x8e51e03100000000, + 0x214c6b8a00000000, 0x444712e300000000, 0x3e710c2600000000, + 0x5b7a754f00000000, 0xf467fef400000000, 0x916c879d00000000, + 0x7ef6ddc600000000, 0x1bfda4af00000000, 0xb4e02f1400000000, + 0xd1eb567d00000000, 0xabdd48b800000000, 0xced631d100000000, + 0x61cbba6a00000000, 0x04c0c30300000000, 0xd4a1f73b00000000, + 0xb1aa8e5200000000, 0x1eb705e900000000, 0x7bbc7c8000000000, + 0x018a624500000000, 0x64811b2c00000000, 0xcb9c909700000000, + 0xae97e9fe00000000, 0x6b5ff8e700000000, 0x0e54818e00000000, + 0xa1490a3500000000, 0xc442735c00000000, 0xbe746d9900000000, + 0xdb7f14f000000000, 0x74629f4b00000000, 0x1169e62200000000, + 0xc108d21a00000000, 0xa403ab7300000000, 0x0b1e20c800000000, + 0x6e1559a100000000, 0x1423476400000000, 0x71283e0d00000000, + 0xde35b5b600000000, 0xbb3eccdf00000000, 0xe94e5cd200000000, + 0x8c4525bb00000000, 0x2358ae0000000000, 0x4653d76900000000, + 0x3c65c9ac00000000, 0x596eb0c500000000, 0xf6733b7e00000000, + 0x9378421700000000, 0x4319762f00000000, 0x26120f4600000000, + 0x890f84fd00000000, 0xec04fd9400000000, 0x9632e35100000000, + 0xf3399a3800000000, 0x5c24118300000000, 0x392f68ea00000000, + 0xfce779f300000000, 0x99ec009a00000000, 0x36f18b2100000000, + 0x53faf24800000000, 0x29ccec8d00000000, 0x4cc795e400000000, + 0xe3da1e5f00000000, 0x86d1673600000000, 0x56b0530e00000000, + 0x33bb2a6700000000, 0x9ca6a1dc00000000, 0xf9add8b500000000, + 0x839bc67000000000, 0xe690bf1900000000, 0x498d34a200000000, + 0x2c864dcb00000000, 0xc31c179000000000, 0xa6176ef900000000, + 0x090ae54200000000, 0x6c019c2b00000000, 0x163782ee00000000, + 0x733cfb8700000000, 0xdc21703c00000000, 0xb92a095500000000, + 0x694b3d6d00000000, 0x0c40440400000000, 0xa35dcfbf00000000, + 0xc656b6d600000000, 0xbc60a81300000000, 0xd96bd17a00000000, + 0x76765ac100000000, 0x137d23a800000000, 0xd6b532b100000000, + 0xb3be4bd800000000, 0x1ca3c06300000000, 0x79a8b90a00000000, + 0x039ea7cf00000000, 0x6695dea600000000, 0xc988551d00000000, + 0xac832c7400000000, 0x7ce2184c00000000, 0x19e9612500000000, + 0xb6f4ea9e00000000, 0xd3ff93f700000000, 0xa9c98d3200000000, + 0xccc2f45b00000000, 0x63df7fe000000000, 0x06d4068900000000, + 0xbdeaca5600000000, 0xd8e1b33f00000000, 0x77fc388400000000, + 0x12f741ed00000000, 0x68c15f2800000000, 0x0dca264100000000, + 0xa2d7adfa00000000, 0xc7dcd49300000000, 0x17bde0ab00000000, + 0x72b699c200000000, 0xddab127900000000, 0xb8a06b1000000000, + 0xc29675d500000000, 0xa79d0cbc00000000, 0x0880870700000000, + 0x6d8bfe6e00000000, 0xa843ef7700000000, 0xcd48961e00000000, + 0x62551da500000000, 0x075e64cc00000000, 0x7d687a0900000000, + 0x1863036000000000, 0xb77e88db00000000, 0xd275f1b200000000, + 0x0214c58a00000000, 0x671fbce300000000, 0xc802375800000000, + 0xad094e3100000000, 0xd73f50f400000000, 0xb234299d00000000, + 0x1d29a22600000000, 0x7822db4f00000000, 0x97b8811400000000, + 0xf2b3f87d00000000, 0x5dae73c600000000, 0x38a50aaf00000000, + 0x4293146a00000000, 0x27986d0300000000, 0x8885e6b800000000, + 0xed8e9fd100000000, 0x3defabe900000000, 0x58e4d28000000000, + 0xf7f9593b00000000, 0x92f2205200000000, 0xe8c43e9700000000, + 0x8dcf47fe00000000, 0x22d2cc4500000000, 0x47d9b52c00000000, + 0x8211a43500000000, 0xe71add5c00000000, 0x480756e700000000, + 0x2d0c2f8e00000000, 0x573a314b00000000, 0x3231482200000000, + 0x9d2cc39900000000, 0xf827baf000000000, 0x28468ec800000000, + 0x4d4df7a100000000, 0xe2507c1a00000000, 0x875b057300000000, + 0xfd6d1bb600000000, 0x986662df00000000, 0x377be96400000000, + 0x5270900d00000000}, + {0x0000000000000000, 0xdcecb13d00000000, 0xb8d9637b00000000, + 0x6435d24600000000, 0x70b3c7f600000000, 0xac5f76cb00000000, + 0xc86aa48d00000000, 0x148615b000000000, 0xa160fe3600000000, + 0x7d8c4f0b00000000, 0x19b99d4d00000000, 0xc5552c7000000000, + 0xd1d339c000000000, 0x0d3f88fd00000000, 0x690a5abb00000000, + 0xb5e6eb8600000000, 0x42c1fc6d00000000, 0x9e2d4d5000000000, + 0xfa189f1600000000, 0x26f42e2b00000000, 0x32723b9b00000000, + 0xee9e8aa600000000, 0x8aab58e000000000, 0x5647e9dd00000000, + 0xe3a1025b00000000, 0x3f4db36600000000, 0x5b78612000000000, + 0x8794d01d00000000, 0x9312c5ad00000000, 0x4ffe749000000000, + 0x2bcba6d600000000, 0xf72717eb00000000, 0x8482f9db00000000, + 0x586e48e600000000, 0x3c5b9aa000000000, 0xe0b72b9d00000000, + 0xf4313e2d00000000, 0x28dd8f1000000000, 0x4ce85d5600000000, + 0x9004ec6b00000000, 0x25e207ed00000000, 0xf90eb6d000000000, + 0x9d3b649600000000, 0x41d7d5ab00000000, 0x5551c01b00000000, + 0x89bd712600000000, 0xed88a36000000000, 0x3164125d00000000, + 0xc64305b600000000, 0x1aafb48b00000000, 0x7e9a66cd00000000, + 0xa276d7f000000000, 0xb6f0c24000000000, 0x6a1c737d00000000, + 0x0e29a13b00000000, 0xd2c5100600000000, 0x6723fb8000000000, + 0xbbcf4abd00000000, 0xdffa98fb00000000, 0x031629c600000000, + 0x17903c7600000000, 0xcb7c8d4b00000000, 0xaf495f0d00000000, + 0x73a5ee3000000000, 0x4903826c00000000, 0x95ef335100000000, + 0xf1dae11700000000, 0x2d36502a00000000, 0x39b0459a00000000, + 0xe55cf4a700000000, 0x816926e100000000, 0x5d8597dc00000000, + 0xe8637c5a00000000, 0x348fcd6700000000, 0x50ba1f2100000000, + 0x8c56ae1c00000000, 0x98d0bbac00000000, 0x443c0a9100000000, + 0x2009d8d700000000, 0xfce569ea00000000, 0x0bc27e0100000000, + 0xd72ecf3c00000000, 0xb31b1d7a00000000, 0x6ff7ac4700000000, + 0x7b71b9f700000000, 0xa79d08ca00000000, 0xc3a8da8c00000000, + 0x1f446bb100000000, 0xaaa2803700000000, 0x764e310a00000000, + 0x127be34c00000000, 0xce97527100000000, 0xda1147c100000000, + 0x06fdf6fc00000000, 0x62c824ba00000000, 0xbe24958700000000, + 0xcd817bb700000000, 0x116dca8a00000000, 0x755818cc00000000, + 0xa9b4a9f100000000, 0xbd32bc4100000000, 0x61de0d7c00000000, + 0x05ebdf3a00000000, 0xd9076e0700000000, 0x6ce1858100000000, + 0xb00d34bc00000000, 0xd438e6fa00000000, 0x08d457c700000000, + 0x1c52427700000000, 0xc0bef34a00000000, 0xa48b210c00000000, + 0x7867903100000000, 0x8f4087da00000000, 0x53ac36e700000000, + 0x3799e4a100000000, 0xeb75559c00000000, 0xfff3402c00000000, + 0x231ff11100000000, 0x472a235700000000, 0x9bc6926a00000000, + 0x2e2079ec00000000, 0xf2ccc8d100000000, 0x96f91a9700000000, + 0x4a15abaa00000000, 0x5e93be1a00000000, 0x827f0f2700000000, + 0xe64add6100000000, 0x3aa66c5c00000000, 0x920604d900000000, + 0x4eeab5e400000000, 0x2adf67a200000000, 0xf633d69f00000000, + 0xe2b5c32f00000000, 0x3e59721200000000, 0x5a6ca05400000000, + 0x8680116900000000, 0x3366faef00000000, 0xef8a4bd200000000, + 0x8bbf999400000000, 0x575328a900000000, 0x43d53d1900000000, + 0x9f398c2400000000, 0xfb0c5e6200000000, 0x27e0ef5f00000000, + 0xd0c7f8b400000000, 0x0c2b498900000000, 0x681e9bcf00000000, + 0xb4f22af200000000, 0xa0743f4200000000, 0x7c988e7f00000000, + 0x18ad5c3900000000, 0xc441ed0400000000, 0x71a7068200000000, + 0xad4bb7bf00000000, 0xc97e65f900000000, 0x1592d4c400000000, + 0x0114c17400000000, 0xddf8704900000000, 0xb9cda20f00000000, + 0x6521133200000000, 0x1684fd0200000000, 0xca684c3f00000000, + 0xae5d9e7900000000, 0x72b12f4400000000, 0x66373af400000000, + 0xbadb8bc900000000, 0xdeee598f00000000, 0x0202e8b200000000, + 0xb7e4033400000000, 0x6b08b20900000000, 0x0f3d604f00000000, + 0xd3d1d17200000000, 0xc757c4c200000000, 0x1bbb75ff00000000, + 0x7f8ea7b900000000, 0xa362168400000000, 0x5445016f00000000, + 0x88a9b05200000000, 0xec9c621400000000, 0x3070d32900000000, + 0x24f6c69900000000, 0xf81a77a400000000, 0x9c2fa5e200000000, + 0x40c314df00000000, 0xf525ff5900000000, 0x29c94e6400000000, + 0x4dfc9c2200000000, 0x91102d1f00000000, 0x859638af00000000, + 0x597a899200000000, 0x3d4f5bd400000000, 0xe1a3eae900000000, + 0xdb0586b500000000, 0x07e9378800000000, 0x63dce5ce00000000, + 0xbf3054f300000000, 0xabb6414300000000, 0x775af07e00000000, + 0x136f223800000000, 0xcf83930500000000, 0x7a65788300000000, + 0xa689c9be00000000, 0xc2bc1bf800000000, 0x1e50aac500000000, + 0x0ad6bf7500000000, 0xd63a0e4800000000, 0xb20fdc0e00000000, + 0x6ee36d3300000000, 0x99c47ad800000000, 0x4528cbe500000000, + 0x211d19a300000000, 0xfdf1a89e00000000, 0xe977bd2e00000000, + 0x359b0c1300000000, 0x51aede5500000000, 0x8d426f6800000000, + 0x38a484ee00000000, 0xe44835d300000000, 0x807de79500000000, + 0x5c9156a800000000, 0x4817431800000000, 0x94fbf22500000000, + 0xf0ce206300000000, 0x2c22915e00000000, 0x5f877f6e00000000, + 0x836bce5300000000, 0xe75e1c1500000000, 0x3bb2ad2800000000, + 0x2f34b89800000000, 0xf3d809a500000000, 0x97eddbe300000000, + 0x4b016ade00000000, 0xfee7815800000000, 0x220b306500000000, + 0x463ee22300000000, 0x9ad2531e00000000, 0x8e5446ae00000000, + 0x52b8f79300000000, 0x368d25d500000000, 0xea6194e800000000, + 0x1d46830300000000, 0xc1aa323e00000000, 0xa59fe07800000000, + 0x7973514500000000, 0x6df544f500000000, 0xb119f5c800000000, + 0xd52c278e00000000, 0x09c096b300000000, 0xbc267d3500000000, + 0x60cacc0800000000, 0x04ff1e4e00000000, 0xd813af7300000000, + 0xcc95bac300000000, 0x10790bfe00000000, 0x744cd9b800000000, + 0xa8a0688500000000}}; + +#else /* W == 4 */ + +local const z_crc_t FAR crc_braid_table[][256] = { + {0x00000000, 0x81256527, 0xd93bcc0f, 0x581ea928, 0x69069e5f, + 0xe823fb78, 0xb03d5250, 0x31183777, 0xd20d3cbe, 0x53285999, + 0x0b36f0b1, 0x8a139596, 0xbb0ba2e1, 0x3a2ec7c6, 0x62306eee, + 0xe3150bc9, 0x7f6b7f3d, 0xfe4e1a1a, 0xa650b332, 0x2775d615, + 0x166de162, 0x97488445, 0xcf562d6d, 0x4e73484a, 0xad664383, + 0x2c4326a4, 0x745d8f8c, 0xf578eaab, 0xc460dddc, 0x4545b8fb, + 0x1d5b11d3, 0x9c7e74f4, 0xfed6fe7a, 0x7ff39b5d, 0x27ed3275, + 0xa6c85752, 0x97d06025, 0x16f50502, 0x4eebac2a, 0xcfcec90d, + 0x2cdbc2c4, 0xadfea7e3, 0xf5e00ecb, 0x74c56bec, 0x45dd5c9b, + 0xc4f839bc, 0x9ce69094, 0x1dc3f5b3, 0x81bd8147, 0x0098e460, + 0x58864d48, 0xd9a3286f, 0xe8bb1f18, 0x699e7a3f, 0x3180d317, + 0xb0a5b630, 0x53b0bdf9, 0xd295d8de, 0x8a8b71f6, 0x0bae14d1, + 0x3ab623a6, 0xbb934681, 0xe38defa9, 0x62a88a8e, 0x26dcfab5, + 0xa7f99f92, 0xffe736ba, 0x7ec2539d, 0x4fda64ea, 0xceff01cd, + 0x96e1a8e5, 0x17c4cdc2, 0xf4d1c60b, 0x75f4a32c, 0x2dea0a04, + 0xaccf6f23, 0x9dd75854, 0x1cf23d73, 0x44ec945b, 0xc5c9f17c, + 0x59b78588, 0xd892e0af, 0x808c4987, 0x01a92ca0, 0x30b11bd7, + 0xb1947ef0, 0xe98ad7d8, 0x68afb2ff, 0x8bbab936, 0x0a9fdc11, + 0x52817539, 0xd3a4101e, 0xe2bc2769, 0x6399424e, 0x3b87eb66, + 0xbaa28e41, 0xd80a04cf, 0x592f61e8, 0x0131c8c0, 0x8014ade7, + 0xb10c9a90, 0x3029ffb7, 0x6837569f, 0xe91233b8, 0x0a073871, + 0x8b225d56, 0xd33cf47e, 0x52199159, 0x6301a62e, 0xe224c309, + 0xba3a6a21, 0x3b1f0f06, 0xa7617bf2, 0x26441ed5, 0x7e5ab7fd, + 0xff7fd2da, 0xce67e5ad, 0x4f42808a, 0x175c29a2, 0x96794c85, + 0x756c474c, 0xf449226b, 0xac578b43, 0x2d72ee64, 0x1c6ad913, + 0x9d4fbc34, 0xc551151c, 0x4474703b, 0x4db9f56a, 0xcc9c904d, + 0x94823965, 0x15a75c42, 0x24bf6b35, 0xa59a0e12, 0xfd84a73a, + 0x7ca1c21d, 0x9fb4c9d4, 0x1e91acf3, 0x468f05db, 0xc7aa60fc, + 0xf6b2578b, 0x779732ac, 0x2f899b84, 0xaeacfea3, 0x32d28a57, + 0xb3f7ef70, 0xebe94658, 0x6acc237f, 0x5bd41408, 0xdaf1712f, + 0x82efd807, 0x03cabd20, 0xe0dfb6e9, 0x61fad3ce, 0x39e47ae6, + 0xb8c11fc1, 0x89d928b6, 0x08fc4d91, 0x50e2e4b9, 0xd1c7819e, + 0xb36f0b10, 0x324a6e37, 0x6a54c71f, 0xeb71a238, 0xda69954f, + 0x5b4cf068, 0x03525940, 0x82773c67, 0x616237ae, 0xe0475289, + 0xb859fba1, 0x397c9e86, 0x0864a9f1, 0x8941ccd6, 0xd15f65fe, + 0x507a00d9, 0xcc04742d, 0x4d21110a, 0x153fb822, 0x941add05, + 0xa502ea72, 0x24278f55, 0x7c39267d, 0xfd1c435a, 0x1e094893, + 0x9f2c2db4, 0xc732849c, 0x4617e1bb, 0x770fd6cc, 0xf62ab3eb, + 0xae341ac3, 0x2f117fe4, 0x6b650fdf, 0xea406af8, 0xb25ec3d0, + 0x337ba6f7, 0x02639180, 0x8346f4a7, 0xdb585d8f, 0x5a7d38a8, + 0xb9683361, 0x384d5646, 0x6053ff6e, 0xe1769a49, 0xd06ead3e, + 0x514bc819, 0x09556131, 0x88700416, 0x140e70e2, 0x952b15c5, + 0xcd35bced, 0x4c10d9ca, 0x7d08eebd, 0xfc2d8b9a, 0xa43322b2, + 0x25164795, 0xc6034c5c, 0x4726297b, 0x1f388053, 0x9e1de574, + 0xaf05d203, 0x2e20b724, 0x763e1e0c, 0xf71b7b2b, 0x95b3f1a5, + 0x14969482, 0x4c883daa, 0xcdad588d, 0xfcb56ffa, 0x7d900add, + 0x258ea3f5, 0xa4abc6d2, 0x47becd1b, 0xc69ba83c, 0x9e850114, + 0x1fa06433, 0x2eb85344, 0xaf9d3663, 0xf7839f4b, 0x76a6fa6c, + 0xead88e98, 0x6bfdebbf, 0x33e34297, 0xb2c627b0, 0x83de10c7, + 0x02fb75e0, 0x5ae5dcc8, 0xdbc0b9ef, 0x38d5b226, 0xb9f0d701, + 0xe1ee7e29, 0x60cb1b0e, 0x51d32c79, 0xd0f6495e, 0x88e8e076, + 0x09cd8551}, + {0x00000000, 0x9b73ead4, 0xed96d3e9, 0x76e5393d, 0x005ca193, + 0x9b2f4b47, 0xedca727a, 0x76b998ae, 0x00b94326, 0x9bcaa9f2, + 0xed2f90cf, 0x765c7a1b, 0x00e5e2b5, 0x9b960861, 0xed73315c, + 0x7600db88, 0x0172864c, 0x9a016c98, 0xece455a5, 0x7797bf71, + 0x012e27df, 0x9a5dcd0b, 0xecb8f436, 0x77cb1ee2, 0x01cbc56a, + 0x9ab82fbe, 0xec5d1683, 0x772efc57, 0x019764f9, 0x9ae48e2d, + 0xec01b710, 0x77725dc4, 0x02e50c98, 0x9996e64c, 0xef73df71, + 0x740035a5, 0x02b9ad0b, 0x99ca47df, 0xef2f7ee2, 0x745c9436, + 0x025c4fbe, 0x992fa56a, 0xefca9c57, 0x74b97683, 0x0200ee2d, + 0x997304f9, 0xef963dc4, 0x74e5d710, 0x03978ad4, 0x98e46000, + 0xee01593d, 0x7572b3e9, 0x03cb2b47, 0x98b8c193, 0xee5df8ae, + 0x752e127a, 0x032ec9f2, 0x985d2326, 0xeeb81a1b, 0x75cbf0cf, + 0x03726861, 0x980182b5, 0xeee4bb88, 0x7597515c, 0x05ca1930, + 0x9eb9f3e4, 0xe85ccad9, 0x732f200d, 0x0596b8a3, 0x9ee55277, + 0xe8006b4a, 0x7373819e, 0x05735a16, 0x9e00b0c2, 0xe8e589ff, + 0x7396632b, 0x052ffb85, 0x9e5c1151, 0xe8b9286c, 0x73cac2b8, + 0x04b89f7c, 0x9fcb75a8, 0xe92e4c95, 0x725da641, 0x04e43eef, + 0x9f97d43b, 0xe972ed06, 0x720107d2, 0x0401dc5a, 0x9f72368e, + 0xe9970fb3, 0x72e4e567, 0x045d7dc9, 0x9f2e971d, 0xe9cbae20, + 0x72b844f4, 0x072f15a8, 0x9c5cff7c, 0xeab9c641, 0x71ca2c95, + 0x0773b43b, 0x9c005eef, 0xeae567d2, 0x71968d06, 0x0796568e, + 0x9ce5bc5a, 0xea008567, 0x71736fb3, 0x07caf71d, 0x9cb91dc9, + 0xea5c24f4, 0x712fce20, 0x065d93e4, 0x9d2e7930, 0xebcb400d, + 0x70b8aad9, 0x06013277, 0x9d72d8a3, 0xeb97e19e, 0x70e40b4a, + 0x06e4d0c2, 0x9d973a16, 0xeb72032b, 0x7001e9ff, 0x06b87151, + 0x9dcb9b85, 0xeb2ea2b8, 0x705d486c, 0x0b943260, 0x90e7d8b4, + 0xe602e189, 0x7d710b5d, 0x0bc893f3, 0x90bb7927, 0xe65e401a, + 0x7d2daace, 0x0b2d7146, 0x905e9b92, 0xe6bba2af, 0x7dc8487b, + 0x0b71d0d5, 0x90023a01, 0xe6e7033c, 0x7d94e9e8, 0x0ae6b42c, + 0x91955ef8, 0xe77067c5, 0x7c038d11, 0x0aba15bf, 0x91c9ff6b, + 0xe72cc656, 0x7c5f2c82, 0x0a5ff70a, 0x912c1dde, 0xe7c924e3, + 0x7cbace37, 0x0a035699, 0x9170bc4d, 0xe7958570, 0x7ce66fa4, + 0x09713ef8, 0x9202d42c, 0xe4e7ed11, 0x7f9407c5, 0x092d9f6b, + 0x925e75bf, 0xe4bb4c82, 0x7fc8a656, 0x09c87dde, 0x92bb970a, + 0xe45eae37, 0x7f2d44e3, 0x0994dc4d, 0x92e73699, 0xe4020fa4, + 0x7f71e570, 0x0803b8b4, 0x93705260, 0xe5956b5d, 0x7ee68189, + 0x085f1927, 0x932cf3f3, 0xe5c9cace, 0x7eba201a, 0x08bafb92, + 0x93c91146, 0xe52c287b, 0x7e5fc2af, 0x08e65a01, 0x9395b0d5, + 0xe57089e8, 0x7e03633c, 0x0e5e2b50, 0x952dc184, 0xe3c8f8b9, + 0x78bb126d, 0x0e028ac3, 0x95716017, 0xe394592a, 0x78e7b3fe, + 0x0ee76876, 0x959482a2, 0xe371bb9f, 0x7802514b, 0x0ebbc9e5, + 0x95c82331, 0xe32d1a0c, 0x785ef0d8, 0x0f2cad1c, 0x945f47c8, + 0xe2ba7ef5, 0x79c99421, 0x0f700c8f, 0x9403e65b, 0xe2e6df66, + 0x799535b2, 0x0f95ee3a, 0x94e604ee, 0xe2033dd3, 0x7970d707, + 0x0fc94fa9, 0x94baa57d, 0xe25f9c40, 0x792c7694, 0x0cbb27c8, + 0x97c8cd1c, 0xe12df421, 0x7a5e1ef5, 0x0ce7865b, 0x97946c8f, + 0xe17155b2, 0x7a02bf66, 0x0c0264ee, 0x97718e3a, 0xe194b707, + 0x7ae75dd3, 0x0c5ec57d, 0x972d2fa9, 0xe1c81694, 0x7abbfc40, + 0x0dc9a184, 0x96ba4b50, 0xe05f726d, 0x7b2c98b9, 0x0d950017, + 0x96e6eac3, 0xe003d3fe, 0x7b70392a, 0x0d70e2a2, 0x96030876, + 0xe0e6314b, 0x7b95db9f, 0x0d2c4331, 0x965fa9e5, 0xe0ba90d8, + 0x7bc97a0c}, + {0x00000000, 0x172864c0, 0x2e50c980, 0x3978ad40, 0x5ca19300, + 0x4b89f7c0, 0x72f15a80, 0x65d93e40, 0xb9432600, 0xae6b42c0, + 0x9713ef80, 0x803b8b40, 0xe5e2b500, 0xf2cad1c0, 0xcbb27c80, + 0xdc9a1840, 0xa9f74a41, 0xbedf2e81, 0x87a783c1, 0x908fe701, + 0xf556d941, 0xe27ebd81, 0xdb0610c1, 0xcc2e7401, 0x10b46c41, + 0x079c0881, 0x3ee4a5c1, 0x29ccc101, 0x4c15ff41, 0x5b3d9b81, + 0x624536c1, 0x756d5201, 0x889f92c3, 0x9fb7f603, 0xa6cf5b43, + 0xb1e73f83, 0xd43e01c3, 0xc3166503, 0xfa6ec843, 0xed46ac83, + 0x31dcb4c3, 0x26f4d003, 0x1f8c7d43, 0x08a41983, 0x6d7d27c3, + 0x7a554303, 0x432dee43, 0x54058a83, 0x2168d882, 0x3640bc42, + 0x0f381102, 0x181075c2, 0x7dc94b82, 0x6ae12f42, 0x53998202, + 0x44b1e6c2, 0x982bfe82, 0x8f039a42, 0xb67b3702, 0xa15353c2, + 0xc48a6d82, 0xd3a20942, 0xeadaa402, 0xfdf2c0c2, 0xca4e23c7, + 0xdd664707, 0xe41eea47, 0xf3368e87, 0x96efb0c7, 0x81c7d407, + 0xb8bf7947, 0xaf971d87, 0x730d05c7, 0x64256107, 0x5d5dcc47, + 0x4a75a887, 0x2fac96c7, 0x3884f207, 0x01fc5f47, 0x16d43b87, + 0x63b96986, 0x74910d46, 0x4de9a006, 0x5ac1c4c6, 0x3f18fa86, + 0x28309e46, 0x11483306, 0x066057c6, 0xdafa4f86, 0xcdd22b46, + 0xf4aa8606, 0xe382e2c6, 0x865bdc86, 0x9173b846, 0xa80b1506, + 0xbf2371c6, 0x42d1b104, 0x55f9d5c4, 0x6c817884, 0x7ba91c44, + 0x1e702204, 0x095846c4, 0x3020eb84, 0x27088f44, 0xfb929704, + 0xecbaf3c4, 0xd5c25e84, 0xc2ea3a44, 0xa7330404, 0xb01b60c4, + 0x8963cd84, 0x9e4ba944, 0xeb26fb45, 0xfc0e9f85, 0xc57632c5, + 0xd25e5605, 0xb7876845, 0xa0af0c85, 0x99d7a1c5, 0x8effc505, + 0x5265dd45, 0x454db985, 0x7c3514c5, 0x6b1d7005, 0x0ec44e45, + 0x19ec2a85, 0x209487c5, 0x37bce305, 0x4fed41cf, 0x58c5250f, + 0x61bd884f, 0x7695ec8f, 0x134cd2cf, 0x0464b60f, 0x3d1c1b4f, + 0x2a347f8f, 0xf6ae67cf, 0xe186030f, 0xd8feae4f, 0xcfd6ca8f, + 0xaa0ff4cf, 0xbd27900f, 0x845f3d4f, 0x9377598f, 0xe61a0b8e, + 0xf1326f4e, 0xc84ac20e, 0xdf62a6ce, 0xbabb988e, 0xad93fc4e, + 0x94eb510e, 0x83c335ce, 0x5f592d8e, 0x4871494e, 0x7109e40e, + 0x662180ce, 0x03f8be8e, 0x14d0da4e, 0x2da8770e, 0x3a8013ce, + 0xc772d30c, 0xd05ab7cc, 0xe9221a8c, 0xfe0a7e4c, 0x9bd3400c, + 0x8cfb24cc, 0xb583898c, 0xa2abed4c, 0x7e31f50c, 0x691991cc, + 0x50613c8c, 0x4749584c, 0x2290660c, 0x35b802cc, 0x0cc0af8c, + 0x1be8cb4c, 0x6e85994d, 0x79adfd8d, 0x40d550cd, 0x57fd340d, + 0x32240a4d, 0x250c6e8d, 0x1c74c3cd, 0x0b5ca70d, 0xd7c6bf4d, + 0xc0eedb8d, 0xf99676cd, 0xeebe120d, 0x8b672c4d, 0x9c4f488d, + 0xa537e5cd, 0xb21f810d, 0x85a36208, 0x928b06c8, 0xabf3ab88, + 0xbcdbcf48, 0xd902f108, 0xce2a95c8, 0xf7523888, 0xe07a5c48, + 0x3ce04408, 0x2bc820c8, 0x12b08d88, 0x0598e948, 0x6041d708, + 0x7769b3c8, 0x4e111e88, 0x59397a48, 0x2c542849, 0x3b7c4c89, + 0x0204e1c9, 0x152c8509, 0x70f5bb49, 0x67dddf89, 0x5ea572c9, + 0x498d1609, 0x95170e49, 0x823f6a89, 0xbb47c7c9, 0xac6fa309, + 0xc9b69d49, 0xde9ef989, 0xe7e654c9, 0xf0ce3009, 0x0d3cf0cb, + 0x1a14940b, 0x236c394b, 0x34445d8b, 0x519d63cb, 0x46b5070b, + 0x7fcdaa4b, 0x68e5ce8b, 0xb47fd6cb, 0xa357b20b, 0x9a2f1f4b, + 0x8d077b8b, 0xe8de45cb, 0xfff6210b, 0xc68e8c4b, 0xd1a6e88b, + 0xa4cbba8a, 0xb3e3de4a, 0x8a9b730a, 0x9db317ca, 0xf86a298a, + 0xef424d4a, 0xd63ae00a, 0xc11284ca, 0x1d889c8a, 0x0aa0f84a, + 0x33d8550a, 0x24f031ca, 0x41290f8a, 0x56016b4a, 0x6f79c60a, + 0x7851a2ca}, + {0x00000000, 0x9fda839e, 0xe4c4017d, 0x7b1e82e3, 0x12f904bb, + 0x8d238725, 0xf63d05c6, 0x69e78658, 0x25f20976, 0xba288ae8, + 0xc136080b, 0x5eec8b95, 0x370b0dcd, 0xa8d18e53, 0xd3cf0cb0, + 0x4c158f2e, 0x4be412ec, 0xd43e9172, 0xaf201391, 0x30fa900f, + 0x591d1657, 0xc6c795c9, 0xbdd9172a, 0x220394b4, 0x6e161b9a, + 0xf1cc9804, 0x8ad21ae7, 0x15089979, 0x7cef1f21, 0xe3359cbf, + 0x982b1e5c, 0x07f19dc2, 0x97c825d8, 0x0812a646, 0x730c24a5, + 0xecd6a73b, 0x85312163, 0x1aeba2fd, 0x61f5201e, 0xfe2fa380, + 0xb23a2cae, 0x2de0af30, 0x56fe2dd3, 0xc924ae4d, 0xa0c32815, + 0x3f19ab8b, 0x44072968, 0xdbddaaf6, 0xdc2c3734, 0x43f6b4aa, + 0x38e83649, 0xa732b5d7, 0xced5338f, 0x510fb011, 0x2a1132f2, + 0xb5cbb16c, 0xf9de3e42, 0x6604bddc, 0x1d1a3f3f, 0x82c0bca1, + 0xeb273af9, 0x74fdb967, 0x0fe33b84, 0x9039b81a, 0xf4e14df1, + 0x6b3bce6f, 0x10254c8c, 0x8fffcf12, 0xe618494a, 0x79c2cad4, + 0x02dc4837, 0x9d06cba9, 0xd1134487, 0x4ec9c719, 0x35d745fa, + 0xaa0dc664, 0xc3ea403c, 0x5c30c3a2, 0x272e4141, 0xb8f4c2df, + 0xbf055f1d, 0x20dfdc83, 0x5bc15e60, 0xc41bddfe, 0xadfc5ba6, + 0x3226d838, 0x49385adb, 0xd6e2d945, 0x9af7566b, 0x052dd5f5, + 0x7e335716, 0xe1e9d488, 0x880e52d0, 0x17d4d14e, 0x6cca53ad, + 0xf310d033, 0x63296829, 0xfcf3ebb7, 0x87ed6954, 0x1837eaca, + 0x71d06c92, 0xee0aef0c, 0x95146def, 0x0aceee71, 0x46db615f, + 0xd901e2c1, 0xa21f6022, 0x3dc5e3bc, 0x542265e4, 0xcbf8e67a, + 0xb0e66499, 0x2f3ce707, 0x28cd7ac5, 0xb717f95b, 0xcc097bb8, + 0x53d3f826, 0x3a347e7e, 0xa5eefde0, 0xdef07f03, 0x412afc9d, + 0x0d3f73b3, 0x92e5f02d, 0xe9fb72ce, 0x7621f150, 0x1fc67708, + 0x801cf496, 0xfb027675, 0x64d8f5eb, 0x32b39da3, 0xad691e3d, + 0xd6779cde, 0x49ad1f40, 0x204a9918, 0xbf901a86, 0xc48e9865, + 0x5b541bfb, 0x174194d5, 0x889b174b, 0xf38595a8, 0x6c5f1636, + 0x05b8906e, 0x9a6213f0, 0xe17c9113, 0x7ea6128d, 0x79578f4f, + 0xe68d0cd1, 0x9d938e32, 0x02490dac, 0x6bae8bf4, 0xf474086a, + 0x8f6a8a89, 0x10b00917, 0x5ca58639, 0xc37f05a7, 0xb8618744, + 0x27bb04da, 0x4e5c8282, 0xd186011c, 0xaa9883ff, 0x35420061, + 0xa57bb87b, 0x3aa13be5, 0x41bfb906, 0xde653a98, 0xb782bcc0, + 0x28583f5e, 0x5346bdbd, 0xcc9c3e23, 0x8089b10d, 0x1f533293, + 0x644db070, 0xfb9733ee, 0x9270b5b6, 0x0daa3628, 0x76b4b4cb, + 0xe96e3755, 0xee9faa97, 0x71452909, 0x0a5babea, 0x95812874, + 0xfc66ae2c, 0x63bc2db2, 0x18a2af51, 0x87782ccf, 0xcb6da3e1, + 0x54b7207f, 0x2fa9a29c, 0xb0732102, 0xd994a75a, 0x464e24c4, + 0x3d50a627, 0xa28a25b9, 0xc652d052, 0x598853cc, 0x2296d12f, + 0xbd4c52b1, 0xd4abd4e9, 0x4b715777, 0x306fd594, 0xafb5560a, + 0xe3a0d924, 0x7c7a5aba, 0x0764d859, 0x98be5bc7, 0xf159dd9f, + 0x6e835e01, 0x159ddce2, 0x8a475f7c, 0x8db6c2be, 0x126c4120, + 0x6972c3c3, 0xf6a8405d, 0x9f4fc605, 0x0095459b, 0x7b8bc778, + 0xe45144e6, 0xa844cbc8, 0x379e4856, 0x4c80cab5, 0xd35a492b, + 0xbabdcf73, 0x25674ced, 0x5e79ce0e, 0xc1a34d90, 0x519af58a, + 0xce407614, 0xb55ef4f7, 0x2a847769, 0x4363f131, 0xdcb972af, + 0xa7a7f04c, 0x387d73d2, 0x7468fcfc, 0xebb27f62, 0x90acfd81, + 0x0f767e1f, 0x6691f847, 0xf94b7bd9, 0x8255f93a, 0x1d8f7aa4, + 0x1a7ee766, 0x85a464f8, 0xfebae61b, 0x61606585, 0x0887e3dd, + 0x975d6043, 0xec43e2a0, 0x7399613e, 0x3f8cee10, 0xa0566d8e, + 0xdb48ef6d, 0x44926cf3, 0x2d75eaab, 0xb2af6935, 0xc9b1ebd6, + 0x566b6848}}; + +local const z_word_t FAR crc_braid_big_table[][256] = { + {0x00000000, 0x9e83da9f, 0x7d01c4e4, 0xe3821e7b, 0xbb04f912, + 0x2587238d, 0xc6053df6, 0x5886e769, 0x7609f225, 0xe88a28ba, + 0x0b0836c1, 0x958bec5e, 0xcd0d0b37, 0x538ed1a8, 0xb00ccfd3, + 0x2e8f154c, 0xec12e44b, 0x72913ed4, 0x911320af, 0x0f90fa30, + 0x57161d59, 0xc995c7c6, 0x2a17d9bd, 0xb4940322, 0x9a1b166e, + 0x0498ccf1, 0xe71ad28a, 0x79990815, 0x211fef7c, 0xbf9c35e3, + 0x5c1e2b98, 0xc29df107, 0xd825c897, 0x46a61208, 0xa5240c73, + 0x3ba7d6ec, 0x63213185, 0xfda2eb1a, 0x1e20f561, 0x80a32ffe, + 0xae2c3ab2, 0x30afe02d, 0xd32dfe56, 0x4dae24c9, 0x1528c3a0, + 0x8bab193f, 0x68290744, 0xf6aadddb, 0x34372cdc, 0xaab4f643, + 0x4936e838, 0xd7b532a7, 0x8f33d5ce, 0x11b00f51, 0xf232112a, + 0x6cb1cbb5, 0x423edef9, 0xdcbd0466, 0x3f3f1a1d, 0xa1bcc082, + 0xf93a27eb, 0x67b9fd74, 0x843be30f, 0x1ab83990, 0xf14de1f4, + 0x6fce3b6b, 0x8c4c2510, 0x12cfff8f, 0x4a4918e6, 0xd4cac279, + 0x3748dc02, 0xa9cb069d, 0x874413d1, 0x19c7c94e, 0xfa45d735, + 0x64c60daa, 0x3c40eac3, 0xa2c3305c, 0x41412e27, 0xdfc2f4b8, + 0x1d5f05bf, 0x83dcdf20, 0x605ec15b, 0xfedd1bc4, 0xa65bfcad, + 0x38d82632, 0xdb5a3849, 0x45d9e2d6, 0x6b56f79a, 0xf5d52d05, + 0x1657337e, 0x88d4e9e1, 0xd0520e88, 0x4ed1d417, 0xad53ca6c, + 0x33d010f3, 0x29682963, 0xb7ebf3fc, 0x5469ed87, 0xcaea3718, + 0x926cd071, 0x0cef0aee, 0xef6d1495, 0x71eece0a, 0x5f61db46, + 0xc1e201d9, 0x22601fa2, 0xbce3c53d, 0xe4652254, 0x7ae6f8cb, + 0x9964e6b0, 0x07e73c2f, 0xc57acd28, 0x5bf917b7, 0xb87b09cc, + 0x26f8d353, 0x7e7e343a, 0xe0fdeea5, 0x037ff0de, 0x9dfc2a41, + 0xb3733f0d, 0x2df0e592, 0xce72fbe9, 0x50f12176, 0x0877c61f, + 0x96f41c80, 0x757602fb, 0xebf5d864, 0xa39db332, 0x3d1e69ad, + 0xde9c77d6, 0x401fad49, 0x18994a20, 0x861a90bf, 0x65988ec4, + 0xfb1b545b, 0xd5944117, 0x4b179b88, 0xa89585f3, 0x36165f6c, + 0x6e90b805, 0xf013629a, 0x13917ce1, 0x8d12a67e, 0x4f8f5779, + 0xd10c8de6, 0x328e939d, 0xac0d4902, 0xf48bae6b, 0x6a0874f4, + 0x898a6a8f, 0x1709b010, 0x3986a55c, 0xa7057fc3, 0x448761b8, + 0xda04bb27, 0x82825c4e, 0x1c0186d1, 0xff8398aa, 0x61004235, + 0x7bb87ba5, 0xe53ba13a, 0x06b9bf41, 0x983a65de, 0xc0bc82b7, + 0x5e3f5828, 0xbdbd4653, 0x233e9ccc, 0x0db18980, 0x9332531f, + 0x70b04d64, 0xee3397fb, 0xb6b57092, 0x2836aa0d, 0xcbb4b476, + 0x55376ee9, 0x97aa9fee, 0x09294571, 0xeaab5b0a, 0x74288195, + 0x2cae66fc, 0xb22dbc63, 0x51afa218, 0xcf2c7887, 0xe1a36dcb, + 0x7f20b754, 0x9ca2a92f, 0x022173b0, 0x5aa794d9, 0xc4244e46, + 0x27a6503d, 0xb9258aa2, 0x52d052c6, 0xcc538859, 0x2fd19622, + 0xb1524cbd, 0xe9d4abd4, 0x7757714b, 0x94d56f30, 0x0a56b5af, + 0x24d9a0e3, 0xba5a7a7c, 0x59d86407, 0xc75bbe98, 0x9fdd59f1, + 0x015e836e, 0xe2dc9d15, 0x7c5f478a, 0xbec2b68d, 0x20416c12, + 0xc3c37269, 0x5d40a8f6, 0x05c64f9f, 0x9b459500, 0x78c78b7b, + 0xe64451e4, 0xc8cb44a8, 0x56489e37, 0xb5ca804c, 0x2b495ad3, + 0x73cfbdba, 0xed4c6725, 0x0ece795e, 0x904da3c1, 0x8af59a51, + 0x147640ce, 0xf7f45eb5, 0x6977842a, 0x31f16343, 0xaf72b9dc, + 0x4cf0a7a7, 0xd2737d38, 0xfcfc6874, 0x627fb2eb, 0x81fdac90, + 0x1f7e760f, 0x47f89166, 0xd97b4bf9, 0x3af95582, 0xa47a8f1d, + 0x66e77e1a, 0xf864a485, 0x1be6bafe, 0x85656061, 0xdde38708, + 0x43605d97, 0xa0e243ec, 0x3e619973, 0x10ee8c3f, 0x8e6d56a0, + 0x6def48db, 0xf36c9244, 0xabea752d, 0x3569afb2, 0xd6ebb1c9, + 0x48686b56}, + {0x00000000, 0xc0642817, 0x80c9502e, 0x40ad7839, 0x0093a15c, + 0xc0f7894b, 0x805af172, 0x403ed965, 0x002643b9, 0xc0426bae, + 0x80ef1397, 0x408b3b80, 0x00b5e2e5, 0xc0d1caf2, 0x807cb2cb, + 0x40189adc, 0x414af7a9, 0x812edfbe, 0xc183a787, 0x01e78f90, + 0x41d956f5, 0x81bd7ee2, 0xc11006db, 0x01742ecc, 0x416cb410, + 0x81089c07, 0xc1a5e43e, 0x01c1cc29, 0x41ff154c, 0x819b3d5b, + 0xc1364562, 0x01526d75, 0xc3929f88, 0x03f6b79f, 0x435bcfa6, + 0x833fe7b1, 0xc3013ed4, 0x036516c3, 0x43c86efa, 0x83ac46ed, + 0xc3b4dc31, 0x03d0f426, 0x437d8c1f, 0x8319a408, 0xc3277d6d, + 0x0343557a, 0x43ee2d43, 0x838a0554, 0x82d86821, 0x42bc4036, + 0x0211380f, 0xc2751018, 0x824bc97d, 0x422fe16a, 0x02829953, + 0xc2e6b144, 0x82fe2b98, 0x429a038f, 0x02377bb6, 0xc25353a1, + 0x826d8ac4, 0x4209a2d3, 0x02a4daea, 0xc2c0f2fd, 0xc7234eca, + 0x074766dd, 0x47ea1ee4, 0x878e36f3, 0xc7b0ef96, 0x07d4c781, + 0x4779bfb8, 0x871d97af, 0xc7050d73, 0x07612564, 0x47cc5d5d, + 0x87a8754a, 0xc796ac2f, 0x07f28438, 0x475ffc01, 0x873bd416, + 0x8669b963, 0x460d9174, 0x06a0e94d, 0xc6c4c15a, 0x86fa183f, + 0x469e3028, 0x06334811, 0xc6576006, 0x864ffada, 0x462bd2cd, + 0x0686aaf4, 0xc6e282e3, 0x86dc5b86, 0x46b87391, 0x06150ba8, + 0xc67123bf, 0x04b1d142, 0xc4d5f955, 0x8478816c, 0x441ca97b, + 0x0422701e, 0xc4465809, 0x84eb2030, 0x448f0827, 0x049792fb, + 0xc4f3baec, 0x845ec2d5, 0x443aeac2, 0x040433a7, 0xc4601bb0, + 0x84cd6389, 0x44a94b9e, 0x45fb26eb, 0x859f0efc, 0xc53276c5, + 0x05565ed2, 0x456887b7, 0x850cafa0, 0xc5a1d799, 0x05c5ff8e, + 0x45dd6552, 0x85b94d45, 0xc514357c, 0x05701d6b, 0x454ec40e, + 0x852aec19, 0xc5879420, 0x05e3bc37, 0xcf41ed4f, 0x0f25c558, + 0x4f88bd61, 0x8fec9576, 0xcfd24c13, 0x0fb66404, 0x4f1b1c3d, + 0x8f7f342a, 0xcf67aef6, 0x0f0386e1, 0x4faefed8, 0x8fcad6cf, + 0xcff40faa, 0x0f9027bd, 0x4f3d5f84, 0x8f597793, 0x8e0b1ae6, + 0x4e6f32f1, 0x0ec24ac8, 0xcea662df, 0x8e98bbba, 0x4efc93ad, + 0x0e51eb94, 0xce35c383, 0x8e2d595f, 0x4e497148, 0x0ee40971, + 0xce802166, 0x8ebef803, 0x4edad014, 0x0e77a82d, 0xce13803a, + 0x0cd372c7, 0xccb75ad0, 0x8c1a22e9, 0x4c7e0afe, 0x0c40d39b, + 0xcc24fb8c, 0x8c8983b5, 0x4cedaba2, 0x0cf5317e, 0xcc911969, + 0x8c3c6150, 0x4c584947, 0x0c669022, 0xcc02b835, 0x8cafc00c, + 0x4ccbe81b, 0x4d99856e, 0x8dfdad79, 0xcd50d540, 0x0d34fd57, + 0x4d0a2432, 0x8d6e0c25, 0xcdc3741c, 0x0da75c0b, 0x4dbfc6d7, + 0x8ddbeec0, 0xcd7696f9, 0x0d12beee, 0x4d2c678b, 0x8d484f9c, + 0xcde537a5, 0x0d811fb2, 0x0862a385, 0xc8068b92, 0x88abf3ab, + 0x48cfdbbc, 0x08f102d9, 0xc8952ace, 0x883852f7, 0x485c7ae0, + 0x0844e03c, 0xc820c82b, 0x888db012, 0x48e99805, 0x08d74160, + 0xc8b36977, 0x881e114e, 0x487a3959, 0x4928542c, 0x894c7c3b, + 0xc9e10402, 0x09852c15, 0x49bbf570, 0x89dfdd67, 0xc972a55e, + 0x09168d49, 0x490e1795, 0x896a3f82, 0xc9c747bb, 0x09a36fac, + 0x499db6c9, 0x89f99ede, 0xc954e6e7, 0x0930cef0, 0xcbf03c0d, + 0x0b94141a, 0x4b396c23, 0x8b5d4434, 0xcb639d51, 0x0b07b546, + 0x4baacd7f, 0x8bcee568, 0xcbd67fb4, 0x0bb257a3, 0x4b1f2f9a, + 0x8b7b078d, 0xcb45dee8, 0x0b21f6ff, 0x4b8c8ec6, 0x8be8a6d1, + 0x8abacba4, 0x4adee3b3, 0x0a739b8a, 0xca17b39d, 0x8a296af8, + 0x4a4d42ef, 0x0ae03ad6, 0xca8412c1, 0x8a9c881d, 0x4af8a00a, + 0x0a55d833, 0xca31f024, 0x8a0f2941, 0x4a6b0156, 0x0ac6796f, + 0xcaa25178}, + {0x00000000, 0xd4ea739b, 0xe9d396ed, 0x3d39e576, 0x93a15c00, + 0x474b2f9b, 0x7a72caed, 0xae98b976, 0x2643b900, 0xf2a9ca9b, + 0xcf902fed, 0x1b7a5c76, 0xb5e2e500, 0x6108969b, 0x5c3173ed, + 0x88db0076, 0x4c867201, 0x986c019a, 0xa555e4ec, 0x71bf9777, + 0xdf272e01, 0x0bcd5d9a, 0x36f4b8ec, 0xe21ecb77, 0x6ac5cb01, + 0xbe2fb89a, 0x83165dec, 0x57fc2e77, 0xf9649701, 0x2d8ee49a, + 0x10b701ec, 0xc45d7277, 0x980ce502, 0x4ce69699, 0x71df73ef, + 0xa5350074, 0x0badb902, 0xdf47ca99, 0xe27e2fef, 0x36945c74, + 0xbe4f5c02, 0x6aa52f99, 0x579ccaef, 0x8376b974, 0x2dee0002, + 0xf9047399, 0xc43d96ef, 0x10d7e574, 0xd48a9703, 0x0060e498, + 0x3d5901ee, 0xe9b37275, 0x472bcb03, 0x93c1b898, 0xaef85dee, + 0x7a122e75, 0xf2c92e03, 0x26235d98, 0x1b1ab8ee, 0xcff0cb75, + 0x61687203, 0xb5820198, 0x88bbe4ee, 0x5c519775, 0x3019ca05, + 0xe4f3b99e, 0xd9ca5ce8, 0x0d202f73, 0xa3b89605, 0x7752e59e, + 0x4a6b00e8, 0x9e817373, 0x165a7305, 0xc2b0009e, 0xff89e5e8, + 0x2b639673, 0x85fb2f05, 0x51115c9e, 0x6c28b9e8, 0xb8c2ca73, + 0x7c9fb804, 0xa875cb9f, 0x954c2ee9, 0x41a65d72, 0xef3ee404, + 0x3bd4979f, 0x06ed72e9, 0xd2070172, 0x5adc0104, 0x8e36729f, + 0xb30f97e9, 0x67e5e472, 0xc97d5d04, 0x1d972e9f, 0x20aecbe9, + 0xf444b872, 0xa8152f07, 0x7cff5c9c, 0x41c6b9ea, 0x952cca71, + 0x3bb47307, 0xef5e009c, 0xd267e5ea, 0x068d9671, 0x8e569607, + 0x5abce59c, 0x678500ea, 0xb36f7371, 0x1df7ca07, 0xc91db99c, + 0xf4245cea, 0x20ce2f71, 0xe4935d06, 0x30792e9d, 0x0d40cbeb, + 0xd9aab870, 0x77320106, 0xa3d8729d, 0x9ee197eb, 0x4a0be470, + 0xc2d0e406, 0x163a979d, 0x2b0372eb, 0xffe90170, 0x5171b806, + 0x859bcb9d, 0xb8a22eeb, 0x6c485d70, 0x6032940b, 0xb4d8e790, + 0x89e102e6, 0x5d0b717d, 0xf393c80b, 0x2779bb90, 0x1a405ee6, + 0xceaa2d7d, 0x46712d0b, 0x929b5e90, 0xafa2bbe6, 0x7b48c87d, + 0xd5d0710b, 0x013a0290, 0x3c03e7e6, 0xe8e9947d, 0x2cb4e60a, + 0xf85e9591, 0xc56770e7, 0x118d037c, 0xbf15ba0a, 0x6bffc991, + 0x56c62ce7, 0x822c5f7c, 0x0af75f0a, 0xde1d2c91, 0xe324c9e7, + 0x37ceba7c, 0x9956030a, 0x4dbc7091, 0x708595e7, 0xa46fe67c, + 0xf83e7109, 0x2cd40292, 0x11ede7e4, 0xc507947f, 0x6b9f2d09, + 0xbf755e92, 0x824cbbe4, 0x56a6c87f, 0xde7dc809, 0x0a97bb92, + 0x37ae5ee4, 0xe3442d7f, 0x4ddc9409, 0x9936e792, 0xa40f02e4, + 0x70e5717f, 0xb4b80308, 0x60527093, 0x5d6b95e5, 0x8981e67e, + 0x27195f08, 0xf3f32c93, 0xcecac9e5, 0x1a20ba7e, 0x92fbba08, + 0x4611c993, 0x7b282ce5, 0xafc25f7e, 0x015ae608, 0xd5b09593, + 0xe88970e5, 0x3c63037e, 0x502b5e0e, 0x84c12d95, 0xb9f8c8e3, + 0x6d12bb78, 0xc38a020e, 0x17607195, 0x2a5994e3, 0xfeb3e778, + 0x7668e70e, 0xa2829495, 0x9fbb71e3, 0x4b510278, 0xe5c9bb0e, + 0x3123c895, 0x0c1a2de3, 0xd8f05e78, 0x1cad2c0f, 0xc8475f94, + 0xf57ebae2, 0x2194c979, 0x8f0c700f, 0x5be60394, 0x66dfe6e2, + 0xb2359579, 0x3aee950f, 0xee04e694, 0xd33d03e2, 0x07d77079, + 0xa94fc90f, 0x7da5ba94, 0x409c5fe2, 0x94762c79, 0xc827bb0c, + 0x1ccdc897, 0x21f42de1, 0xf51e5e7a, 0x5b86e70c, 0x8f6c9497, + 0xb25571e1, 0x66bf027a, 0xee64020c, 0x3a8e7197, 0x07b794e1, + 0xd35de77a, 0x7dc55e0c, 0xa92f2d97, 0x9416c8e1, 0x40fcbb7a, + 0x84a1c90d, 0x504bba96, 0x6d725fe0, 0xb9982c7b, 0x1700950d, + 0xc3eae696, 0xfed303e0, 0x2a39707b, 0xa2e2700d, 0x76080396, + 0x4b31e6e0, 0x9fdb957b, 0x31432c0d, 0xe5a95f96, 0xd890bae0, + 0x0c7ac97b}, + {0x00000000, 0x27652581, 0x0fcc3bd9, 0x28a91e58, 0x5f9e0669, + 0x78fb23e8, 0x50523db0, 0x77371831, 0xbe3c0dd2, 0x99592853, + 0xb1f0360b, 0x9695138a, 0xe1a20bbb, 0xc6c72e3a, 0xee6e3062, + 0xc90b15e3, 0x3d7f6b7f, 0x1a1a4efe, 0x32b350a6, 0x15d67527, + 0x62e16d16, 0x45844897, 0x6d2d56cf, 0x4a48734e, 0x834366ad, + 0xa426432c, 0x8c8f5d74, 0xabea78f5, 0xdcdd60c4, 0xfbb84545, + 0xd3115b1d, 0xf4747e9c, 0x7afed6fe, 0x5d9bf37f, 0x7532ed27, + 0x5257c8a6, 0x2560d097, 0x0205f516, 0x2aaceb4e, 0x0dc9cecf, + 0xc4c2db2c, 0xe3a7fead, 0xcb0ee0f5, 0xec6bc574, 0x9b5cdd45, + 0xbc39f8c4, 0x9490e69c, 0xb3f5c31d, 0x4781bd81, 0x60e49800, + 0x484d8658, 0x6f28a3d9, 0x181fbbe8, 0x3f7a9e69, 0x17d38031, + 0x30b6a5b0, 0xf9bdb053, 0xded895d2, 0xf6718b8a, 0xd114ae0b, + 0xa623b63a, 0x814693bb, 0xa9ef8de3, 0x8e8aa862, 0xb5fadc26, + 0x929ff9a7, 0xba36e7ff, 0x9d53c27e, 0xea64da4f, 0xcd01ffce, + 0xe5a8e196, 0xc2cdc417, 0x0bc6d1f4, 0x2ca3f475, 0x040aea2d, + 0x236fcfac, 0x5458d79d, 0x733df21c, 0x5b94ec44, 0x7cf1c9c5, + 0x8885b759, 0xafe092d8, 0x87498c80, 0xa02ca901, 0xd71bb130, + 0xf07e94b1, 0xd8d78ae9, 0xffb2af68, 0x36b9ba8b, 0x11dc9f0a, + 0x39758152, 0x1e10a4d3, 0x6927bce2, 0x4e429963, 0x66eb873b, + 0x418ea2ba, 0xcf040ad8, 0xe8612f59, 0xc0c83101, 0xe7ad1480, + 0x909a0cb1, 0xb7ff2930, 0x9f563768, 0xb83312e9, 0x7138070a, + 0x565d228b, 0x7ef43cd3, 0x59911952, 0x2ea60163, 0x09c324e2, + 0x216a3aba, 0x060f1f3b, 0xf27b61a7, 0xd51e4426, 0xfdb75a7e, + 0xdad27fff, 0xade567ce, 0x8a80424f, 0xa2295c17, 0x854c7996, + 0x4c476c75, 0x6b2249f4, 0x438b57ac, 0x64ee722d, 0x13d96a1c, + 0x34bc4f9d, 0x1c1551c5, 0x3b707444, 0x6af5b94d, 0x4d909ccc, + 0x65398294, 0x425ca715, 0x356bbf24, 0x120e9aa5, 0x3aa784fd, + 0x1dc2a17c, 0xd4c9b49f, 0xf3ac911e, 0xdb058f46, 0xfc60aac7, + 0x8b57b2f6, 0xac329777, 0x849b892f, 0xa3feacae, 0x578ad232, + 0x70eff7b3, 0x5846e9eb, 0x7f23cc6a, 0x0814d45b, 0x2f71f1da, + 0x07d8ef82, 0x20bdca03, 0xe9b6dfe0, 0xced3fa61, 0xe67ae439, + 0xc11fc1b8, 0xb628d989, 0x914dfc08, 0xb9e4e250, 0x9e81c7d1, + 0x100b6fb3, 0x376e4a32, 0x1fc7546a, 0x38a271eb, 0x4f9569da, + 0x68f04c5b, 0x40595203, 0x673c7782, 0xae376261, 0x895247e0, + 0xa1fb59b8, 0x869e7c39, 0xf1a96408, 0xd6cc4189, 0xfe655fd1, + 0xd9007a50, 0x2d7404cc, 0x0a11214d, 0x22b83f15, 0x05dd1a94, + 0x72ea02a5, 0x558f2724, 0x7d26397c, 0x5a431cfd, 0x9348091e, + 0xb42d2c9f, 0x9c8432c7, 0xbbe11746, 0xccd60f77, 0xebb32af6, + 0xc31a34ae, 0xe47f112f, 0xdf0f656b, 0xf86a40ea, 0xd0c35eb2, + 0xf7a67b33, 0x80916302, 0xa7f44683, 0x8f5d58db, 0xa8387d5a, + 0x613368b9, 0x46564d38, 0x6eff5360, 0x499a76e1, 0x3ead6ed0, + 0x19c84b51, 0x31615509, 0x16047088, 0xe2700e14, 0xc5152b95, + 0xedbc35cd, 0xcad9104c, 0xbdee087d, 0x9a8b2dfc, 0xb22233a4, + 0x95471625, 0x5c4c03c6, 0x7b292647, 0x5380381f, 0x74e51d9e, + 0x03d205af, 0x24b7202e, 0x0c1e3e76, 0x2b7b1bf7, 0xa5f1b395, + 0x82949614, 0xaa3d884c, 0x8d58adcd, 0xfa6fb5fc, 0xdd0a907d, + 0xf5a38e25, 0xd2c6aba4, 0x1bcdbe47, 0x3ca89bc6, 0x1401859e, + 0x3364a01f, 0x4453b82e, 0x63369daf, 0x4b9f83f7, 0x6cfaa676, + 0x988ed8ea, 0xbfebfd6b, 0x9742e333, 0xb027c6b2, 0xc710de83, + 0xe075fb02, 0xc8dce55a, 0xefb9c0db, 0x26b2d538, 0x01d7f0b9, + 0x297eeee1, 0x0e1bcb60, 0x792cd351, 0x5e49f6d0, 0x76e0e888, + 0x5185cd09}}; + +#endif + +#endif + +#endif + +local const z_crc_t FAR x2n_table[] = { + 0x40000000, 0x20000000, 0x08000000, 0x00800000, 0x00008000, + 0xedb88320, 0xb1e6b092, 0xa06a2517, 0xed627dae, 0x88d14467, + 0xd7bbfe6a, 0xec447f11, 0x8e7ea170, 0x6427800e, 0x4d47bae0, + 0x09fe548f, 0x83852d0f, 0x30362f1a, 0x7b5a9cc3, 0x31fec169, + 0x9fec022a, 0x6c8dedc4, 0x15d6874d, 0x5fde7a4e, 0xbad90e37, + 0x2e4e5eef, 0x4eaba214, 0xa8a472c0, 0x429a969e, 0x148d302a, + 0xc40ba6d0, 0xc4e22c3c}; diff --git a/c-blosc/internal-complibs/zlib-1.3.1/deflate.c b/c-blosc/internal-complibs/zlib-1.3.1/deflate.c new file mode 100644 index 0000000..012ea81 --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.3.1/deflate.c @@ -0,0 +1,2139 @@ +/* deflate.c -- compress data using the deflation algorithm + * Copyright (C) 1995-2024 Jean-loup Gailly and Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* + * ALGORITHM + * + * The "deflation" process depends on being able to identify portions + * of the input text which are identical to earlier input (within a + * sliding window trailing behind the input currently being processed). + * + * The most straightforward technique turns out to be the fastest for + * most input files: try all possible matches and select the longest. + * The key feature of this algorithm is that insertions into the string + * dictionary are very simple and thus fast, and deletions are avoided + * completely. Insertions are performed at each input character, whereas + * string matches are performed only when the previous match ends. So it + * is preferable to spend more time in matches to allow very fast string + * insertions and avoid deletions. The matching algorithm for small + * strings is inspired from that of Rabin & Karp. A brute force approach + * is used to find longer strings when a small match has been found. + * A similar algorithm is used in comic (by Jan-Mark Wams) and freeze + * (by Leonid Broukhis). + * A previous version of this file used a more sophisticated algorithm + * (by Fiala and Greene) which is guaranteed to run in linear amortized + * time, but has a larger average cost, uses more memory and is patented. + * However the F&G algorithm may be faster for some highly redundant + * files if the parameter max_chain_length (described below) is too large. + * + * ACKNOWLEDGEMENTS + * + * The idea of lazy evaluation of matches is due to Jan-Mark Wams, and + * I found it in 'freeze' written by Leonid Broukhis. + * Thanks to many people for bug reports and testing. + * + * REFERENCES + * + * Deutsch, L.P.,"DEFLATE Compressed Data Format Specification". + * Available in http://tools.ietf.org/html/rfc1951 + * + * A description of the Rabin and Karp algorithm is given in the book + * "Algorithms" by R. Sedgewick, Addison-Wesley, p252. + * + * Fiala,E.R., and Greene,D.H. + * Data Compression with Finite Windows, Comm.ACM, 32,4 (1989) 490-595 + * + */ + +/* @(#) $Id$ */ + +#include "deflate.h" + +const char deflate_copyright[] = + " deflate 1.3.1 Copyright 1995-2024 Jean-loup Gailly and Mark Adler "; +/* + If you use the zlib library in a product, an acknowledgment is welcome + in the documentation of your product. If for some reason you cannot + include such an acknowledgment, I would appreciate that you keep this + copyright string in the executable of your product. + */ + +typedef enum { + need_more, /* block not completed, need more input or more output */ + block_done, /* block flush performed */ + finish_started, /* finish started, need only more output at next deflate */ + finish_done /* finish done, accept no more input or output */ +} block_state; + +typedef block_state (*compress_func)(deflate_state *s, int flush); +/* Compression function. Returns the block state after the call. */ + +local block_state deflate_stored(deflate_state *s, int flush); +local block_state deflate_fast(deflate_state *s, int flush); +#ifndef FASTEST +local block_state deflate_slow(deflate_state *s, int flush); +#endif +local block_state deflate_rle(deflate_state *s, int flush); +local block_state deflate_huff(deflate_state *s, int flush); + +/* =========================================================================== + * Local data + */ + +#define NIL 0 +/* Tail of hash chains */ + +#ifndef TOO_FAR +# define TOO_FAR 4096 +#endif +/* Matches of length 3 are discarded if their distance exceeds TOO_FAR */ + +/* Values for max_lazy_match, good_match and max_chain_length, depending on + * the desired pack level (0..9). The values given below have been tuned to + * exclude worst case performance for pathological files. Better values may be + * found for specific files. + */ +typedef struct config_s { + ush good_length; /* reduce lazy search above this match length */ + ush max_lazy; /* do not perform lazy search above this match length */ + ush nice_length; /* quit search above this match length */ + ush max_chain; + compress_func func; +} config; + +#ifdef FASTEST +local const config configuration_table[2] = { +/* good lazy nice chain */ +/* 0 */ {0, 0, 0, 0, deflate_stored}, /* store only */ +/* 1 */ {4, 4, 8, 4, deflate_fast}}; /* max speed, no lazy matches */ +#else +local const config configuration_table[10] = { +/* good lazy nice chain */ +/* 0 */ {0, 0, 0, 0, deflate_stored}, /* store only */ +/* 1 */ {4, 4, 8, 4, deflate_fast}, /* max speed, no lazy matches */ +/* 2 */ {4, 5, 16, 8, deflate_fast}, +/* 3 */ {4, 6, 32, 32, deflate_fast}, + +/* 4 */ {4, 4, 16, 16, deflate_slow}, /* lazy matches */ +/* 5 */ {8, 16, 32, 32, deflate_slow}, +/* 6 */ {8, 16, 128, 128, deflate_slow}, +/* 7 */ {8, 32, 128, 256, deflate_slow}, +/* 8 */ {32, 128, 258, 1024, deflate_slow}, +/* 9 */ {32, 258, 258, 4096, deflate_slow}}; /* max compression */ +#endif + +/* Note: the deflate() code requires max_lazy >= MIN_MATCH and max_chain >= 4 + * For deflate_fast() (levels <= 3) good is ignored and lazy has a different + * meaning. + */ + +/* rank Z_BLOCK between Z_NO_FLUSH and Z_PARTIAL_FLUSH */ +#define RANK(f) (((f) * 2) - ((f) > 4 ? 9 : 0)) + +/* =========================================================================== + * Update a hash value with the given input byte + * IN assertion: all calls to UPDATE_HASH are made with consecutive input + * characters, so that a running hash key can be computed from the previous + * key instead of complete recalculation each time. + */ +#define UPDATE_HASH(s,h,c) (h = (((h) << s->hash_shift) ^ (c)) & s->hash_mask) + + +/* =========================================================================== + * Insert string str in the dictionary and set match_head to the previous head + * of the hash chain (the most recent string with same hash key). Return + * the previous length of the hash chain. + * If this file is compiled with -DFASTEST, the compression level is forced + * to 1, and no hash chains are maintained. + * IN assertion: all calls to INSERT_STRING are made with consecutive input + * characters and the first MIN_MATCH bytes of str are valid (except for + * the last MIN_MATCH-1 bytes of the input file). + */ +#ifdef FASTEST +#define INSERT_STRING(s, str, match_head) \ + (UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]), \ + match_head = s->head[s->ins_h], \ + s->head[s->ins_h] = (Pos)(str)) +#else +#define INSERT_STRING(s, str, match_head) \ + (UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]), \ + match_head = s->prev[(str) & s->w_mask] = s->head[s->ins_h], \ + s->head[s->ins_h] = (Pos)(str)) +#endif + +/* =========================================================================== + * Initialize the hash table (avoiding 64K overflow for 16 bit systems). + * prev[] will be initialized on the fly. + */ +#define CLEAR_HASH(s) \ + do { \ + s->head[s->hash_size - 1] = NIL; \ + zmemzero((Bytef *)s->head, \ + (unsigned)(s->hash_size - 1)*sizeof(*s->head)); \ + } while (0) + +/* =========================================================================== + * Slide the hash table when sliding the window down (could be avoided with 32 + * bit values at the expense of memory usage). We slide even when level == 0 to + * keep the hash table consistent if we switch back to level > 0 later. + */ +#if defined(__has_feature) +# if __has_feature(memory_sanitizer) + __attribute__((no_sanitize("memory"))) +# endif +#endif +local void slide_hash(deflate_state *s) { + unsigned n, m; + Posf *p; + uInt wsize = s->w_size; + + n = s->hash_size; + p = &s->head[n]; + do { + m = *--p; + *p = (Pos)(m >= wsize ? m - wsize : NIL); + } while (--n); + n = wsize; +#ifndef FASTEST + p = &s->prev[n]; + do { + m = *--p; + *p = (Pos)(m >= wsize ? m - wsize : NIL); + /* If n is not on any hash chain, prev[n] is garbage but + * its value will never be used. + */ + } while (--n); +#endif +} + +/* =========================================================================== + * Read a new buffer from the current input stream, update the adler32 + * and total number of bytes read. All deflate() input goes through + * this function so some applications may wish to modify it to avoid + * allocating a large strm->next_in buffer and copying from it. + * (See also flush_pending()). + */ +local unsigned read_buf(z_streamp strm, Bytef *buf, unsigned size) { + unsigned len = strm->avail_in; + + if (len > size) len = size; + if (len == 0) return 0; + + strm->avail_in -= len; + + zmemcpy(buf, strm->next_in, len); + if (strm->state->wrap == 1) { + strm->adler = adler32(strm->adler, buf, len); + } +#ifdef GZIP + else if (strm->state->wrap == 2) { + strm->adler = crc32(strm->adler, buf, len); + } +#endif + strm->next_in += len; + strm->total_in += len; + + return len; +} + +/* =========================================================================== + * Fill the window when the lookahead becomes insufficient. + * Updates strstart and lookahead. + * + * IN assertion: lookahead < MIN_LOOKAHEAD + * OUT assertions: strstart <= window_size-MIN_LOOKAHEAD + * At least one byte has been read, or avail_in == 0; reads are + * performed for at least two bytes (required for the zip translate_eol + * option -- not supported here). + */ +local void fill_window(deflate_state *s) { + unsigned n; + unsigned more; /* Amount of free space at the end of the window. */ + uInt wsize = s->w_size; + + Assert(s->lookahead < MIN_LOOKAHEAD, "already enough lookahead"); + + do { + more = (unsigned)(s->window_size -(ulg)s->lookahead -(ulg)s->strstart); + + /* Deal with !@#$% 64K limit: */ + if (sizeof(int) <= 2) { + if (more == 0 && s->strstart == 0 && s->lookahead == 0) { + more = wsize; + + } else if (more == (unsigned)(-1)) { + /* Very unlikely, but possible on 16 bit machine if + * strstart == 0 && lookahead == 1 (input done a byte at time) + */ + more--; + } + } + + /* If the window is almost full and there is insufficient lookahead, + * move the upper half to the lower one to make room in the upper half. + */ + if (s->strstart >= wsize + MAX_DIST(s)) { + + zmemcpy(s->window, s->window + wsize, (unsigned)wsize - more); + s->match_start -= wsize; + s->strstart -= wsize; /* we now have strstart >= MAX_DIST */ + s->block_start -= (long) wsize; + if (s->insert > s->strstart) + s->insert = s->strstart; + slide_hash(s); + more += wsize; + } + if (s->strm->avail_in == 0) break; + + /* If there was no sliding: + * strstart <= WSIZE+MAX_DIST-1 && lookahead <= MIN_LOOKAHEAD - 1 && + * more == window_size - lookahead - strstart + * => more >= window_size - (MIN_LOOKAHEAD-1 + WSIZE + MAX_DIST-1) + * => more >= window_size - 2*WSIZE + 2 + * In the BIG_MEM or MMAP case (not yet supported), + * window_size == input_size + MIN_LOOKAHEAD && + * strstart + s->lookahead <= input_size => more >= MIN_LOOKAHEAD. + * Otherwise, window_size == 2*WSIZE so more >= 2. + * If there was sliding, more >= WSIZE. So in all cases, more >= 2. + */ + Assert(more >= 2, "more < 2"); + + n = read_buf(s->strm, s->window + s->strstart + s->lookahead, more); + s->lookahead += n; + + /* Initialize the hash value now that we have some input: */ + if (s->lookahead + s->insert >= MIN_MATCH) { + uInt str = s->strstart - s->insert; + s->ins_h = s->window[str]; + UPDATE_HASH(s, s->ins_h, s->window[str + 1]); +#if MIN_MATCH != 3 + Call UPDATE_HASH() MIN_MATCH-3 more times +#endif + while (s->insert) { + UPDATE_HASH(s, s->ins_h, s->window[str + MIN_MATCH-1]); +#ifndef FASTEST + s->prev[str & s->w_mask] = s->head[s->ins_h]; +#endif + s->head[s->ins_h] = (Pos)str; + str++; + s->insert--; + if (s->lookahead + s->insert < MIN_MATCH) + break; + } + } + /* If the whole input has less than MIN_MATCH bytes, ins_h is garbage, + * but this is not important since only literal bytes will be emitted. + */ + + } while (s->lookahead < MIN_LOOKAHEAD && s->strm->avail_in != 0); + + /* If the WIN_INIT bytes after the end of the current data have never been + * written, then zero those bytes in order to avoid memory check reports of + * the use of uninitialized (or uninitialised as Julian writes) bytes by + * the longest match routines. Update the high water mark for the next + * time through here. WIN_INIT is set to MAX_MATCH since the longest match + * routines allow scanning to strstart + MAX_MATCH, ignoring lookahead. + */ + if (s->high_water < s->window_size) { + ulg curr = s->strstart + (ulg)(s->lookahead); + ulg init; + + if (s->high_water < curr) { + /* Previous high water mark below current data -- zero WIN_INIT + * bytes or up to end of window, whichever is less. + */ + init = s->window_size - curr; + if (init > WIN_INIT) + init = WIN_INIT; + zmemzero(s->window + curr, (unsigned)init); + s->high_water = curr + init; + } + else if (s->high_water < (ulg)curr + WIN_INIT) { + /* High water mark at or above current data, but below current data + * plus WIN_INIT -- zero out to current data plus WIN_INIT, or up + * to end of window, whichever is less. + */ + init = (ulg)curr + WIN_INIT - s->high_water; + if (init > s->window_size - s->high_water) + init = s->window_size - s->high_water; + zmemzero(s->window + s->high_water, (unsigned)init); + s->high_water += init; + } + } + + Assert((ulg)s->strstart <= s->window_size - MIN_LOOKAHEAD, + "not enough room for search"); +} + +/* ========================================================================= */ +int ZEXPORT deflateInit_(z_streamp strm, int level, const char *version, + int stream_size) { + return deflateInit2_(strm, level, Z_DEFLATED, MAX_WBITS, DEF_MEM_LEVEL, + Z_DEFAULT_STRATEGY, version, stream_size); + /* To do: ignore strm->next_in if we use it as window */ +} + +/* ========================================================================= */ +int ZEXPORT deflateInit2_(z_streamp strm, int level, int method, + int windowBits, int memLevel, int strategy, + const char *version, int stream_size) { + deflate_state *s; + int wrap = 1; + static const char my_version[] = ZLIB_VERSION; + + if (version == Z_NULL || version[0] != my_version[0] || + stream_size != sizeof(z_stream)) { + return Z_VERSION_ERROR; + } + if (strm == Z_NULL) return Z_STREAM_ERROR; + + strm->msg = Z_NULL; + if (strm->zalloc == (alloc_func)0) { +#ifdef Z_SOLO + return Z_STREAM_ERROR; +#else + strm->zalloc = zcalloc; + strm->opaque = (voidpf)0; +#endif + } + if (strm->zfree == (free_func)0) +#ifdef Z_SOLO + return Z_STREAM_ERROR; +#else + strm->zfree = zcfree; +#endif + +#ifdef FASTEST + if (level != 0) level = 1; +#else + if (level == Z_DEFAULT_COMPRESSION) level = 6; +#endif + + if (windowBits < 0) { /* suppress zlib wrapper */ + wrap = 0; + if (windowBits < -15) + return Z_STREAM_ERROR; + windowBits = -windowBits; + } +#ifdef GZIP + else if (windowBits > 15) { + wrap = 2; /* write gzip wrapper instead */ + windowBits -= 16; + } +#endif + if (memLevel < 1 || memLevel > MAX_MEM_LEVEL || method != Z_DEFLATED || + windowBits < 8 || windowBits > 15 || level < 0 || level > 9 || + strategy < 0 || strategy > Z_FIXED || (windowBits == 8 && wrap != 1)) { + return Z_STREAM_ERROR; + } + if (windowBits == 8) windowBits = 9; /* until 256-byte window bug fixed */ + s = (deflate_state *) ZALLOC(strm, 1, sizeof(deflate_state)); + if (s == Z_NULL) return Z_MEM_ERROR; + strm->state = (struct internal_state FAR *)s; + s->strm = strm; + s->status = INIT_STATE; /* to pass state test in deflateReset() */ + + s->wrap = wrap; + s->gzhead = Z_NULL; + s->w_bits = (uInt)windowBits; + s->w_size = 1 << s->w_bits; + s->w_mask = s->w_size - 1; + + s->hash_bits = (uInt)memLevel + 7; + s->hash_size = 1 << s->hash_bits; + s->hash_mask = s->hash_size - 1; + s->hash_shift = ((s->hash_bits + MIN_MATCH-1) / MIN_MATCH); + + s->window = (Bytef *) ZALLOC(strm, s->w_size, 2*sizeof(Byte)); + s->prev = (Posf *) ZALLOC(strm, s->w_size, sizeof(Pos)); + s->head = (Posf *) ZALLOC(strm, s->hash_size, sizeof(Pos)); + + s->high_water = 0; /* nothing written to s->window yet */ + + s->lit_bufsize = 1 << (memLevel + 6); /* 16K elements by default */ + + /* We overlay pending_buf and sym_buf. This works since the average size + * for length/distance pairs over any compressed block is assured to be 31 + * bits or less. + * + * Analysis: The longest fixed codes are a length code of 8 bits plus 5 + * extra bits, for lengths 131 to 257. The longest fixed distance codes are + * 5 bits plus 13 extra bits, for distances 16385 to 32768. The longest + * possible fixed-codes length/distance pair is then 31 bits total. + * + * sym_buf starts one-fourth of the way into pending_buf. So there are + * three bytes in sym_buf for every four bytes in pending_buf. Each symbol + * in sym_buf is three bytes -- two for the distance and one for the + * literal/length. As each symbol is consumed, the pointer to the next + * sym_buf value to read moves forward three bytes. From that symbol, up to + * 31 bits are written to pending_buf. The closest the written pending_buf + * bits gets to the next sym_buf symbol to read is just before the last + * code is written. At that time, 31*(n - 2) bits have been written, just + * after 24*(n - 2) bits have been consumed from sym_buf. sym_buf starts at + * 8*n bits into pending_buf. (Note that the symbol buffer fills when n - 1 + * symbols are written.) The closest the writing gets to what is unread is + * then n + 14 bits. Here n is lit_bufsize, which is 16384 by default, and + * can range from 128 to 32768. + * + * Therefore, at a minimum, there are 142 bits of space between what is + * written and what is read in the overlain buffers, so the symbols cannot + * be overwritten by the compressed data. That space is actually 139 bits, + * due to the three-bit fixed-code block header. + * + * That covers the case where either Z_FIXED is specified, forcing fixed + * codes, or when the use of fixed codes is chosen, because that choice + * results in a smaller compressed block than dynamic codes. That latter + * condition then assures that the above analysis also covers all dynamic + * blocks. A dynamic-code block will only be chosen to be emitted if it has + * fewer bits than a fixed-code block would for the same set of symbols. + * Therefore its average symbol length is assured to be less than 31. So + * the compressed data for a dynamic block also cannot overwrite the + * symbols from which it is being constructed. + */ + + s->pending_buf = (uchf *) ZALLOC(strm, s->lit_bufsize, LIT_BUFS); + s->pending_buf_size = (ulg)s->lit_bufsize * 4; + + if (s->window == Z_NULL || s->prev == Z_NULL || s->head == Z_NULL || + s->pending_buf == Z_NULL) { + s->status = FINISH_STATE; + strm->msg = ERR_MSG(Z_MEM_ERROR); + deflateEnd (strm); + return Z_MEM_ERROR; + } +#ifdef LIT_MEM + s->d_buf = (ushf *)(s->pending_buf + (s->lit_bufsize << 1)); + s->l_buf = s->pending_buf + (s->lit_bufsize << 2); + s->sym_end = s->lit_bufsize - 1; +#else + s->sym_buf = s->pending_buf + s->lit_bufsize; + s->sym_end = (s->lit_bufsize - 1) * 3; +#endif + /* We avoid equality with lit_bufsize*3 because of wraparound at 64K + * on 16 bit machines and because stored blocks are restricted to + * 64K-1 bytes. + */ + + s->level = level; + s->strategy = strategy; + s->method = (Byte)method; + + return deflateReset(strm); +} + +/* ========================================================================= + * Check for a valid deflate stream state. Return 0 if ok, 1 if not. + */ +local int deflateStateCheck(z_streamp strm) { + deflate_state *s; + if (strm == Z_NULL || + strm->zalloc == (alloc_func)0 || strm->zfree == (free_func)0) + return 1; + s = strm->state; + if (s == Z_NULL || s->strm != strm || (s->status != INIT_STATE && +#ifdef GZIP + s->status != GZIP_STATE && +#endif + s->status != EXTRA_STATE && + s->status != NAME_STATE && + s->status != COMMENT_STATE && + s->status != HCRC_STATE && + s->status != BUSY_STATE && + s->status != FINISH_STATE)) + return 1; + return 0; +} + +/* ========================================================================= */ +int ZEXPORT deflateSetDictionary(z_streamp strm, const Bytef *dictionary, + uInt dictLength) { + deflate_state *s; + uInt str, n; + int wrap; + unsigned avail; + z_const unsigned char *next; + + if (deflateStateCheck(strm) || dictionary == Z_NULL) + return Z_STREAM_ERROR; + s = strm->state; + wrap = s->wrap; + if (wrap == 2 || (wrap == 1 && s->status != INIT_STATE) || s->lookahead) + return Z_STREAM_ERROR; + + /* when using zlib wrappers, compute Adler-32 for provided dictionary */ + if (wrap == 1) + strm->adler = adler32(strm->adler, dictionary, dictLength); + s->wrap = 0; /* avoid computing Adler-32 in read_buf */ + + /* if dictionary would fill window, just replace the history */ + if (dictLength >= s->w_size) { + if (wrap == 0) { /* already empty otherwise */ + CLEAR_HASH(s); + s->strstart = 0; + s->block_start = 0L; + s->insert = 0; + } + dictionary += dictLength - s->w_size; /* use the tail */ + dictLength = s->w_size; + } + + /* insert dictionary into window and hash */ + avail = strm->avail_in; + next = strm->next_in; + strm->avail_in = dictLength; + strm->next_in = (z_const Bytef *)dictionary; + fill_window(s); + while (s->lookahead >= MIN_MATCH) { + str = s->strstart; + n = s->lookahead - (MIN_MATCH-1); + do { + UPDATE_HASH(s, s->ins_h, s->window[str + MIN_MATCH-1]); +#ifndef FASTEST + s->prev[str & s->w_mask] = s->head[s->ins_h]; +#endif + s->head[s->ins_h] = (Pos)str; + str++; + } while (--n); + s->strstart = str; + s->lookahead = MIN_MATCH-1; + fill_window(s); + } + s->strstart += s->lookahead; + s->block_start = (long)s->strstart; + s->insert = s->lookahead; + s->lookahead = 0; + s->match_length = s->prev_length = MIN_MATCH-1; + s->match_available = 0; + strm->next_in = next; + strm->avail_in = avail; + s->wrap = wrap; + return Z_OK; +} + +/* ========================================================================= */ +int ZEXPORT deflateGetDictionary(z_streamp strm, Bytef *dictionary, + uInt *dictLength) { + deflate_state *s; + uInt len; + + if (deflateStateCheck(strm)) + return Z_STREAM_ERROR; + s = strm->state; + len = s->strstart + s->lookahead; + if (len > s->w_size) + len = s->w_size; + if (dictionary != Z_NULL && len) + zmemcpy(dictionary, s->window + s->strstart + s->lookahead - len, len); + if (dictLength != Z_NULL) + *dictLength = len; + return Z_OK; +} + +/* ========================================================================= */ +int ZEXPORT deflateResetKeep(z_streamp strm) { + deflate_state *s; + + if (deflateStateCheck(strm)) { + return Z_STREAM_ERROR; + } + + strm->total_in = strm->total_out = 0; + strm->msg = Z_NULL; /* use zfree if we ever allocate msg dynamically */ + strm->data_type = Z_UNKNOWN; + + s = (deflate_state *)strm->state; + s->pending = 0; + s->pending_out = s->pending_buf; + + if (s->wrap < 0) { + s->wrap = -s->wrap; /* was made negative by deflate(..., Z_FINISH); */ + } + s->status = +#ifdef GZIP + s->wrap == 2 ? GZIP_STATE : +#endif + INIT_STATE; + strm->adler = +#ifdef GZIP + s->wrap == 2 ? crc32(0L, Z_NULL, 0) : +#endif + adler32(0L, Z_NULL, 0); + s->last_flush = -2; + + _tr_init(s); + + return Z_OK; +} + +/* =========================================================================== + * Initialize the "longest match" routines for a new zlib stream + */ +local void lm_init(deflate_state *s) { + s->window_size = (ulg)2L*s->w_size; + + CLEAR_HASH(s); + + /* Set the default configuration parameters: + */ + s->max_lazy_match = configuration_table[s->level].max_lazy; + s->good_match = configuration_table[s->level].good_length; + s->nice_match = configuration_table[s->level].nice_length; + s->max_chain_length = configuration_table[s->level].max_chain; + + s->strstart = 0; + s->block_start = 0L; + s->lookahead = 0; + s->insert = 0; + s->match_length = s->prev_length = MIN_MATCH-1; + s->match_available = 0; + s->ins_h = 0; +} + +/* ========================================================================= */ +int ZEXPORT deflateReset(z_streamp strm) { + int ret; + + ret = deflateResetKeep(strm); + if (ret == Z_OK) + lm_init(strm->state); + return ret; +} + +/* ========================================================================= */ +int ZEXPORT deflateSetHeader(z_streamp strm, gz_headerp head) { + if (deflateStateCheck(strm) || strm->state->wrap != 2) + return Z_STREAM_ERROR; + strm->state->gzhead = head; + return Z_OK; +} + +/* ========================================================================= */ +int ZEXPORT deflatePending(z_streamp strm, unsigned *pending, int *bits) { + if (deflateStateCheck(strm)) return Z_STREAM_ERROR; + if (pending != Z_NULL) + *pending = strm->state->pending; + if (bits != Z_NULL) + *bits = strm->state->bi_valid; + return Z_OK; +} + +/* ========================================================================= */ +int ZEXPORT deflatePrime(z_streamp strm, int bits, int value) { + deflate_state *s; + int put; + + if (deflateStateCheck(strm)) return Z_STREAM_ERROR; + s = strm->state; +#ifdef LIT_MEM + if (bits < 0 || bits > 16 || + (uchf *)s->d_buf < s->pending_out + ((Buf_size + 7) >> 3)) + return Z_BUF_ERROR; +#else + if (bits < 0 || bits > 16 || + s->sym_buf < s->pending_out + ((Buf_size + 7) >> 3)) + return Z_BUF_ERROR; +#endif + do { + put = Buf_size - s->bi_valid; + if (put > bits) + put = bits; + s->bi_buf |= (ush)((value & ((1 << put) - 1)) << s->bi_valid); + s->bi_valid += put; + _tr_flush_bits(s); + value >>= put; + bits -= put; + } while (bits); + return Z_OK; +} + +/* ========================================================================= */ +int ZEXPORT deflateParams(z_streamp strm, int level, int strategy) { + deflate_state *s; + compress_func func; + + if (deflateStateCheck(strm)) return Z_STREAM_ERROR; + s = strm->state; + +#ifdef FASTEST + if (level != 0) level = 1; +#else + if (level == Z_DEFAULT_COMPRESSION) level = 6; +#endif + if (level < 0 || level > 9 || strategy < 0 || strategy > Z_FIXED) { + return Z_STREAM_ERROR; + } + func = configuration_table[s->level].func; + + if ((strategy != s->strategy || func != configuration_table[level].func) && + s->last_flush != -2) { + /* Flush the last buffer: */ + int err = deflate(strm, Z_BLOCK); + if (err == Z_STREAM_ERROR) + return err; + if (strm->avail_in || (s->strstart - s->block_start) + s->lookahead) + return Z_BUF_ERROR; + } + if (s->level != level) { + if (s->level == 0 && s->matches != 0) { + if (s->matches == 1) + slide_hash(s); + else + CLEAR_HASH(s); + s->matches = 0; + } + s->level = level; + s->max_lazy_match = configuration_table[level].max_lazy; + s->good_match = configuration_table[level].good_length; + s->nice_match = configuration_table[level].nice_length; + s->max_chain_length = configuration_table[level].max_chain; + } + s->strategy = strategy; + return Z_OK; +} + +/* ========================================================================= */ +int ZEXPORT deflateTune(z_streamp strm, int good_length, int max_lazy, + int nice_length, int max_chain) { + deflate_state *s; + + if (deflateStateCheck(strm)) return Z_STREAM_ERROR; + s = strm->state; + s->good_match = (uInt)good_length; + s->max_lazy_match = (uInt)max_lazy; + s->nice_match = nice_length; + s->max_chain_length = (uInt)max_chain; + return Z_OK; +} + +/* ========================================================================= + * For the default windowBits of 15 and memLevel of 8, this function returns a + * close to exact, as well as small, upper bound on the compressed size. This + * is an expansion of ~0.03%, plus a small constant. + * + * For any setting other than those defaults for windowBits and memLevel, one + * of two worst case bounds is returned. This is at most an expansion of ~4% or + * ~13%, plus a small constant. + * + * Both the 0.03% and 4% derive from the overhead of stored blocks. The first + * one is for stored blocks of 16383 bytes (memLevel == 8), whereas the second + * is for stored blocks of 127 bytes (the worst case memLevel == 1). The + * expansion results from five bytes of header for each stored block. + * + * The larger expansion of 13% results from a window size less than or equal to + * the symbols buffer size (windowBits <= memLevel + 7). In that case some of + * the data being compressed may have slid out of the sliding window, impeding + * a stored block from being emitted. Then the only choice is a fixed or + * dynamic block, where a fixed block limits the maximum expansion to 9 bits + * per 8-bit byte, plus 10 bits for every block. The smallest block size for + * which this can occur is 255 (memLevel == 2). + * + * Shifts are used to approximate divisions, for speed. + */ +uLong ZEXPORT deflateBound(z_streamp strm, uLong sourceLen) { + deflate_state *s; + uLong fixedlen, storelen, wraplen; + + /* upper bound for fixed blocks with 9-bit literals and length 255 + (memLevel == 2, which is the lowest that may not use stored blocks) -- + ~13% overhead plus a small constant */ + fixedlen = sourceLen + (sourceLen >> 3) + (sourceLen >> 8) + + (sourceLen >> 9) + 4; + + /* upper bound for stored blocks with length 127 (memLevel == 1) -- + ~4% overhead plus a small constant */ + storelen = sourceLen + (sourceLen >> 5) + (sourceLen >> 7) + + (sourceLen >> 11) + 7; + + /* if can't get parameters, return larger bound plus a zlib wrapper */ + if (deflateStateCheck(strm)) + return (fixedlen > storelen ? fixedlen : storelen) + 6; + + /* compute wrapper length */ + s = strm->state; + switch (s->wrap) { + case 0: /* raw deflate */ + wraplen = 0; + break; + case 1: /* zlib wrapper */ + wraplen = 6 + (s->strstart ? 4 : 0); + break; +#ifdef GZIP + case 2: /* gzip wrapper */ + wraplen = 18; + if (s->gzhead != Z_NULL) { /* user-supplied gzip header */ + Bytef *str; + if (s->gzhead->extra != Z_NULL) + wraplen += 2 + s->gzhead->extra_len; + str = s->gzhead->name; + if (str != Z_NULL) + do { + wraplen++; + } while (*str++); + str = s->gzhead->comment; + if (str != Z_NULL) + do { + wraplen++; + } while (*str++); + if (s->gzhead->hcrc) + wraplen += 2; + } + break; +#endif + default: /* for compiler happiness */ + wraplen = 6; + } + + /* if not default parameters, return one of the conservative bounds */ + if (s->w_bits != 15 || s->hash_bits != 8 + 7) + return (s->w_bits <= s->hash_bits && s->level ? fixedlen : storelen) + + wraplen; + + /* default settings: return tight bound for that case -- ~0.03% overhead + plus a small constant */ + return sourceLen + (sourceLen >> 12) + (sourceLen >> 14) + + (sourceLen >> 25) + 13 - 6 + wraplen; +} + +/* ========================================================================= + * Put a short in the pending buffer. The 16-bit value is put in MSB order. + * IN assertion: the stream state is correct and there is enough room in + * pending_buf. + */ +local void putShortMSB(deflate_state *s, uInt b) { + put_byte(s, (Byte)(b >> 8)); + put_byte(s, (Byte)(b & 0xff)); +} + +/* ========================================================================= + * Flush as much pending output as possible. All deflate() output, except for + * some deflate_stored() output, goes through this function so some + * applications may wish to modify it to avoid allocating a large + * strm->next_out buffer and copying into it. (See also read_buf()). + */ +local void flush_pending(z_streamp strm) { + unsigned len; + deflate_state *s = strm->state; + + _tr_flush_bits(s); + len = s->pending; + if (len > strm->avail_out) len = strm->avail_out; + if (len == 0) return; + + zmemcpy(strm->next_out, s->pending_out, len); + strm->next_out += len; + s->pending_out += len; + strm->total_out += len; + strm->avail_out -= len; + s->pending -= len; + if (s->pending == 0) { + s->pending_out = s->pending_buf; + } +} + +/* =========================================================================== + * Update the header CRC with the bytes s->pending_buf[beg..s->pending - 1]. + */ +#define HCRC_UPDATE(beg) \ + do { \ + if (s->gzhead->hcrc && s->pending > (beg)) \ + strm->adler = crc32(strm->adler, s->pending_buf + (beg), \ + s->pending - (beg)); \ + } while (0) + +/* ========================================================================= */ +int ZEXPORT deflate(z_streamp strm, int flush) { + int old_flush; /* value of flush param for previous deflate call */ + deflate_state *s; + + if (deflateStateCheck(strm) || flush > Z_BLOCK || flush < 0) { + return Z_STREAM_ERROR; + } + s = strm->state; + + if (strm->next_out == Z_NULL || + (strm->avail_in != 0 && strm->next_in == Z_NULL) || + (s->status == FINISH_STATE && flush != Z_FINISH)) { + ERR_RETURN(strm, Z_STREAM_ERROR); + } + if (strm->avail_out == 0) ERR_RETURN(strm, Z_BUF_ERROR); + + old_flush = s->last_flush; + s->last_flush = flush; + + /* Flush as much pending output as possible */ + if (s->pending != 0) { + flush_pending(strm); + if (strm->avail_out == 0) { + /* Since avail_out is 0, deflate will be called again with + * more output space, but possibly with both pending and + * avail_in equal to zero. There won't be anything to do, + * but this is not an error situation so make sure we + * return OK instead of BUF_ERROR at next call of deflate: + */ + s->last_flush = -1; + return Z_OK; + } + + /* Make sure there is something to do and avoid duplicate consecutive + * flushes. For repeated and useless calls with Z_FINISH, we keep + * returning Z_STREAM_END instead of Z_BUF_ERROR. + */ + } else if (strm->avail_in == 0 && RANK(flush) <= RANK(old_flush) && + flush != Z_FINISH) { + ERR_RETURN(strm, Z_BUF_ERROR); + } + + /* User must not provide more input after the first FINISH: */ + if (s->status == FINISH_STATE && strm->avail_in != 0) { + ERR_RETURN(strm, Z_BUF_ERROR); + } + + /* Write the header */ + if (s->status == INIT_STATE && s->wrap == 0) + s->status = BUSY_STATE; + if (s->status == INIT_STATE) { + /* zlib header */ + uInt header = (Z_DEFLATED + ((s->w_bits - 8) << 4)) << 8; + uInt level_flags; + + if (s->strategy >= Z_HUFFMAN_ONLY || s->level < 2) + level_flags = 0; + else if (s->level < 6) + level_flags = 1; + else if (s->level == 6) + level_flags = 2; + else + level_flags = 3; + header |= (level_flags << 6); + if (s->strstart != 0) header |= PRESET_DICT; + header += 31 - (header % 31); + + putShortMSB(s, header); + + /* Save the adler32 of the preset dictionary: */ + if (s->strstart != 0) { + putShortMSB(s, (uInt)(strm->adler >> 16)); + putShortMSB(s, (uInt)(strm->adler & 0xffff)); + } + strm->adler = adler32(0L, Z_NULL, 0); + s->status = BUSY_STATE; + + /* Compression must start with an empty pending buffer */ + flush_pending(strm); + if (s->pending != 0) { + s->last_flush = -1; + return Z_OK; + } + } +#ifdef GZIP + if (s->status == GZIP_STATE) { + /* gzip header */ + strm->adler = crc32(0L, Z_NULL, 0); + put_byte(s, 31); + put_byte(s, 139); + put_byte(s, 8); + if (s->gzhead == Z_NULL) { + put_byte(s, 0); + put_byte(s, 0); + put_byte(s, 0); + put_byte(s, 0); + put_byte(s, 0); + put_byte(s, s->level == 9 ? 2 : + (s->strategy >= Z_HUFFMAN_ONLY || s->level < 2 ? + 4 : 0)); + put_byte(s, OS_CODE); + s->status = BUSY_STATE; + + /* Compression must start with an empty pending buffer */ + flush_pending(strm); + if (s->pending != 0) { + s->last_flush = -1; + return Z_OK; + } + } + else { + put_byte(s, (s->gzhead->text ? 1 : 0) + + (s->gzhead->hcrc ? 2 : 0) + + (s->gzhead->extra == Z_NULL ? 0 : 4) + + (s->gzhead->name == Z_NULL ? 0 : 8) + + (s->gzhead->comment == Z_NULL ? 0 : 16) + ); + put_byte(s, (Byte)(s->gzhead->time & 0xff)); + put_byte(s, (Byte)((s->gzhead->time >> 8) & 0xff)); + put_byte(s, (Byte)((s->gzhead->time >> 16) & 0xff)); + put_byte(s, (Byte)((s->gzhead->time >> 24) & 0xff)); + put_byte(s, s->level == 9 ? 2 : + (s->strategy >= Z_HUFFMAN_ONLY || s->level < 2 ? + 4 : 0)); + put_byte(s, s->gzhead->os & 0xff); + if (s->gzhead->extra != Z_NULL) { + put_byte(s, s->gzhead->extra_len & 0xff); + put_byte(s, (s->gzhead->extra_len >> 8) & 0xff); + } + if (s->gzhead->hcrc) + strm->adler = crc32(strm->adler, s->pending_buf, + s->pending); + s->gzindex = 0; + s->status = EXTRA_STATE; + } + } + if (s->status == EXTRA_STATE) { + if (s->gzhead->extra != Z_NULL) { + ulg beg = s->pending; /* start of bytes to update crc */ + uInt left = (s->gzhead->extra_len & 0xffff) - s->gzindex; + while (s->pending + left > s->pending_buf_size) { + uInt copy = s->pending_buf_size - s->pending; + zmemcpy(s->pending_buf + s->pending, + s->gzhead->extra + s->gzindex, copy); + s->pending = s->pending_buf_size; + HCRC_UPDATE(beg); + s->gzindex += copy; + flush_pending(strm); + if (s->pending != 0) { + s->last_flush = -1; + return Z_OK; + } + beg = 0; + left -= copy; + } + zmemcpy(s->pending_buf + s->pending, + s->gzhead->extra + s->gzindex, left); + s->pending += left; + HCRC_UPDATE(beg); + s->gzindex = 0; + } + s->status = NAME_STATE; + } + if (s->status == NAME_STATE) { + if (s->gzhead->name != Z_NULL) { + ulg beg = s->pending; /* start of bytes to update crc */ + int val; + do { + if (s->pending == s->pending_buf_size) { + HCRC_UPDATE(beg); + flush_pending(strm); + if (s->pending != 0) { + s->last_flush = -1; + return Z_OK; + } + beg = 0; + } + val = s->gzhead->name[s->gzindex++]; + put_byte(s, val); + } while (val != 0); + HCRC_UPDATE(beg); + s->gzindex = 0; + } + s->status = COMMENT_STATE; + } + if (s->status == COMMENT_STATE) { + if (s->gzhead->comment != Z_NULL) { + ulg beg = s->pending; /* start of bytes to update crc */ + int val; + do { + if (s->pending == s->pending_buf_size) { + HCRC_UPDATE(beg); + flush_pending(strm); + if (s->pending != 0) { + s->last_flush = -1; + return Z_OK; + } + beg = 0; + } + val = s->gzhead->comment[s->gzindex++]; + put_byte(s, val); + } while (val != 0); + HCRC_UPDATE(beg); + } + s->status = HCRC_STATE; + } + if (s->status == HCRC_STATE) { + if (s->gzhead->hcrc) { + if (s->pending + 2 > s->pending_buf_size) { + flush_pending(strm); + if (s->pending != 0) { + s->last_flush = -1; + return Z_OK; + } + } + put_byte(s, (Byte)(strm->adler & 0xff)); + put_byte(s, (Byte)((strm->adler >> 8) & 0xff)); + strm->adler = crc32(0L, Z_NULL, 0); + } + s->status = BUSY_STATE; + + /* Compression must start with an empty pending buffer */ + flush_pending(strm); + if (s->pending != 0) { + s->last_flush = -1; + return Z_OK; + } + } +#endif + + /* Start a new block or continue the current one. + */ + if (strm->avail_in != 0 || s->lookahead != 0 || + (flush != Z_NO_FLUSH && s->status != FINISH_STATE)) { + block_state bstate; + + bstate = s->level == 0 ? deflate_stored(s, flush) : + s->strategy == Z_HUFFMAN_ONLY ? deflate_huff(s, flush) : + s->strategy == Z_RLE ? deflate_rle(s, flush) : + (*(configuration_table[s->level].func))(s, flush); + + if (bstate == finish_started || bstate == finish_done) { + s->status = FINISH_STATE; + } + if (bstate == need_more || bstate == finish_started) { + if (strm->avail_out == 0) { + s->last_flush = -1; /* avoid BUF_ERROR next call, see above */ + } + return Z_OK; + /* If flush != Z_NO_FLUSH && avail_out == 0, the next call + * of deflate should use the same flush parameter to make sure + * that the flush is complete. So we don't have to output an + * empty block here, this will be done at next call. This also + * ensures that for a very small output buffer, we emit at most + * one empty block. + */ + } + if (bstate == block_done) { + if (flush == Z_PARTIAL_FLUSH) { + _tr_align(s); + } else if (flush != Z_BLOCK) { /* FULL_FLUSH or SYNC_FLUSH */ + _tr_stored_block(s, (char*)0, 0L, 0); + /* For a full flush, this empty block will be recognized + * as a special marker by inflate_sync(). + */ + if (flush == Z_FULL_FLUSH) { + CLEAR_HASH(s); /* forget history */ + if (s->lookahead == 0) { + s->strstart = 0; + s->block_start = 0L; + s->insert = 0; + } + } + } + flush_pending(strm); + if (strm->avail_out == 0) { + s->last_flush = -1; /* avoid BUF_ERROR at next call, see above */ + return Z_OK; + } + } + } + + if (flush != Z_FINISH) return Z_OK; + if (s->wrap <= 0) return Z_STREAM_END; + + /* Write the trailer */ +#ifdef GZIP + if (s->wrap == 2) { + put_byte(s, (Byte)(strm->adler & 0xff)); + put_byte(s, (Byte)((strm->adler >> 8) & 0xff)); + put_byte(s, (Byte)((strm->adler >> 16) & 0xff)); + put_byte(s, (Byte)((strm->adler >> 24) & 0xff)); + put_byte(s, (Byte)(strm->total_in & 0xff)); + put_byte(s, (Byte)((strm->total_in >> 8) & 0xff)); + put_byte(s, (Byte)((strm->total_in >> 16) & 0xff)); + put_byte(s, (Byte)((strm->total_in >> 24) & 0xff)); + } + else +#endif + { + putShortMSB(s, (uInt)(strm->adler >> 16)); + putShortMSB(s, (uInt)(strm->adler & 0xffff)); + } + flush_pending(strm); + /* If avail_out is zero, the application will call deflate again + * to flush the rest. + */ + if (s->wrap > 0) s->wrap = -s->wrap; /* write the trailer only once! */ + return s->pending != 0 ? Z_OK : Z_STREAM_END; +} + +/* ========================================================================= */ +int ZEXPORT deflateEnd(z_streamp strm) { + int status; + + if (deflateStateCheck(strm)) return Z_STREAM_ERROR; + + status = strm->state->status; + + /* Deallocate in reverse order of allocations: */ + TRY_FREE(strm, strm->state->pending_buf); + TRY_FREE(strm, strm->state->head); + TRY_FREE(strm, strm->state->prev); + TRY_FREE(strm, strm->state->window); + + ZFREE(strm, strm->state); + strm->state = Z_NULL; + + return status == BUSY_STATE ? Z_DATA_ERROR : Z_OK; +} + +/* ========================================================================= + * Copy the source state to the destination state. + * To simplify the source, this is not supported for 16-bit MSDOS (which + * doesn't have enough memory anyway to duplicate compression states). + */ +int ZEXPORT deflateCopy(z_streamp dest, z_streamp source) { +#ifdef MAXSEG_64K + (void)dest; + (void)source; + return Z_STREAM_ERROR; +#else + deflate_state *ds; + deflate_state *ss; + + + if (deflateStateCheck(source) || dest == Z_NULL) { + return Z_STREAM_ERROR; + } + + ss = source->state; + + zmemcpy((voidpf)dest, (voidpf)source, sizeof(z_stream)); + + ds = (deflate_state *) ZALLOC(dest, 1, sizeof(deflate_state)); + if (ds == Z_NULL) return Z_MEM_ERROR; + dest->state = (struct internal_state FAR *) ds; + zmemcpy((voidpf)ds, (voidpf)ss, sizeof(deflate_state)); + ds->strm = dest; + + ds->window = (Bytef *) ZALLOC(dest, ds->w_size, 2*sizeof(Byte)); + ds->prev = (Posf *) ZALLOC(dest, ds->w_size, sizeof(Pos)); + ds->head = (Posf *) ZALLOC(dest, ds->hash_size, sizeof(Pos)); + ds->pending_buf = (uchf *) ZALLOC(dest, ds->lit_bufsize, LIT_BUFS); + + if (ds->window == Z_NULL || ds->prev == Z_NULL || ds->head == Z_NULL || + ds->pending_buf == Z_NULL) { + deflateEnd (dest); + return Z_MEM_ERROR; + } + /* following zmemcpy do not work for 16-bit MSDOS */ + zmemcpy(ds->window, ss->window, ds->w_size * 2 * sizeof(Byte)); + zmemcpy((voidpf)ds->prev, (voidpf)ss->prev, ds->w_size * sizeof(Pos)); + zmemcpy((voidpf)ds->head, (voidpf)ss->head, ds->hash_size * sizeof(Pos)); + zmemcpy(ds->pending_buf, ss->pending_buf, ds->lit_bufsize * LIT_BUFS); + + ds->pending_out = ds->pending_buf + (ss->pending_out - ss->pending_buf); +#ifdef LIT_MEM + ds->d_buf = (ushf *)(ds->pending_buf + (ds->lit_bufsize << 1)); + ds->l_buf = ds->pending_buf + (ds->lit_bufsize << 2); +#else + ds->sym_buf = ds->pending_buf + ds->lit_bufsize; +#endif + + ds->l_desc.dyn_tree = ds->dyn_ltree; + ds->d_desc.dyn_tree = ds->dyn_dtree; + ds->bl_desc.dyn_tree = ds->bl_tree; + + return Z_OK; +#endif /* MAXSEG_64K */ +} + +#ifndef FASTEST +/* =========================================================================== + * Set match_start to the longest match starting at the given string and + * return its length. Matches shorter or equal to prev_length are discarded, + * in which case the result is equal to prev_length and match_start is + * garbage. + * IN assertions: cur_match is the head of the hash chain for the current + * string (strstart) and its distance is <= MAX_DIST, and prev_length >= 1 + * OUT assertion: the match length is not greater than s->lookahead. + */ +local uInt longest_match(deflate_state *s, IPos cur_match) { + unsigned chain_length = s->max_chain_length;/* max hash chain length */ + register Bytef *scan = s->window + s->strstart; /* current string */ + register Bytef *match; /* matched string */ + register int len; /* length of current match */ + int best_len = (int)s->prev_length; /* best match length so far */ + int nice_match = s->nice_match; /* stop if match long enough */ + IPos limit = s->strstart > (IPos)MAX_DIST(s) ? + s->strstart - (IPos)MAX_DIST(s) : NIL; + /* Stop when cur_match becomes <= limit. To simplify the code, + * we prevent matches with the string of window index 0. + */ + Posf *prev = s->prev; + uInt wmask = s->w_mask; + +#ifdef UNALIGNED_OK + /* Compare two bytes at a time. Note: this is not always beneficial. + * Try with and without -DUNALIGNED_OK to check. + */ + register Bytef *strend = s->window + s->strstart + MAX_MATCH - 1; + register ush scan_start = *(ushf*)scan; + register ush scan_end = *(ushf*)(scan + best_len - 1); +#else + register Bytef *strend = s->window + s->strstart + MAX_MATCH; + register Byte scan_end1 = scan[best_len - 1]; + register Byte scan_end = scan[best_len]; +#endif + + /* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16. + * It is easy to get rid of this optimization if necessary. + */ + Assert(s->hash_bits >= 8 && MAX_MATCH == 258, "Code too clever"); + + /* Do not waste too much time if we already have a good match: */ + if (s->prev_length >= s->good_match) { + chain_length >>= 2; + } + /* Do not look for matches beyond the end of the input. This is necessary + * to make deflate deterministic. + */ + if ((uInt)nice_match > s->lookahead) nice_match = (int)s->lookahead; + + Assert((ulg)s->strstart <= s->window_size - MIN_LOOKAHEAD, + "need lookahead"); + + do { + Assert(cur_match < s->strstart, "no future"); + match = s->window + cur_match; + + /* Skip to next match if the match length cannot increase + * or if the match length is less than 2. Note that the checks below + * for insufficient lookahead only occur occasionally for performance + * reasons. Therefore uninitialized memory will be accessed, and + * conditional jumps will be made that depend on those values. + * However the length of the match is limited to the lookahead, so + * the output of deflate is not affected by the uninitialized values. + */ +#if (defined(UNALIGNED_OK) && MAX_MATCH == 258) + /* This code assumes sizeof(unsigned short) == 2. Do not use + * UNALIGNED_OK if your compiler uses a different size. + */ + if (*(ushf*)(match + best_len - 1) != scan_end || + *(ushf*)match != scan_start) continue; + + /* It is not necessary to compare scan[2] and match[2] since they are + * always equal when the other bytes match, given that the hash keys + * are equal and that HASH_BITS >= 8. Compare 2 bytes at a time at + * strstart + 3, + 5, up to strstart + 257. We check for insufficient + * lookahead only every 4th comparison; the 128th check will be made + * at strstart + 257. If MAX_MATCH-2 is not a multiple of 8, it is + * necessary to put more guard bytes at the end of the window, or + * to check more often for insufficient lookahead. + */ + Assert(scan[2] == match[2], "scan[2]?"); + scan++, match++; + do { + } while (*(ushf*)(scan += 2) == *(ushf*)(match += 2) && + *(ushf*)(scan += 2) == *(ushf*)(match += 2) && + *(ushf*)(scan += 2) == *(ushf*)(match += 2) && + *(ushf*)(scan += 2) == *(ushf*)(match += 2) && + scan < strend); + /* The funny "do {}" generates better code on most compilers */ + + /* Here, scan <= window + strstart + 257 */ + Assert(scan <= s->window + (unsigned)(s->window_size - 1), + "wild scan"); + if (*scan == *match) scan++; + + len = (MAX_MATCH - 1) - (int)(strend - scan); + scan = strend - (MAX_MATCH-1); + +#else /* UNALIGNED_OK */ + + if (match[best_len] != scan_end || + match[best_len - 1] != scan_end1 || + *match != *scan || + *++match != scan[1]) continue; + + /* The check at best_len - 1 can be removed because it will be made + * again later. (This heuristic is not always a win.) + * It is not necessary to compare scan[2] and match[2] since they + * are always equal when the other bytes match, given that + * the hash keys are equal and that HASH_BITS >= 8. + */ + scan += 2, match++; + Assert(*scan == *match, "match[2]?"); + + /* We check for insufficient lookahead only every 8th comparison; + * the 256th check will be made at strstart + 258. + */ + do { + } while (*++scan == *++match && *++scan == *++match && + *++scan == *++match && *++scan == *++match && + *++scan == *++match && *++scan == *++match && + *++scan == *++match && *++scan == *++match && + scan < strend); + + Assert(scan <= s->window + (unsigned)(s->window_size - 1), + "wild scan"); + + len = MAX_MATCH - (int)(strend - scan); + scan = strend - MAX_MATCH; + +#endif /* UNALIGNED_OK */ + + if (len > best_len) { + s->match_start = cur_match; + best_len = len; + if (len >= nice_match) break; +#ifdef UNALIGNED_OK + scan_end = *(ushf*)(scan + best_len - 1); +#else + scan_end1 = scan[best_len - 1]; + scan_end = scan[best_len]; +#endif + } + } while ((cur_match = prev[cur_match & wmask]) > limit + && --chain_length != 0); + + if ((uInt)best_len <= s->lookahead) return (uInt)best_len; + return s->lookahead; +} + +#else /* FASTEST */ + +/* --------------------------------------------------------------------------- + * Optimized version for FASTEST only + */ +local uInt longest_match(deflate_state *s, IPos cur_match) { + register Bytef *scan = s->window + s->strstart; /* current string */ + register Bytef *match; /* matched string */ + register int len; /* length of current match */ + register Bytef *strend = s->window + s->strstart + MAX_MATCH; + + /* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16. + * It is easy to get rid of this optimization if necessary. + */ + Assert(s->hash_bits >= 8 && MAX_MATCH == 258, "Code too clever"); + + Assert((ulg)s->strstart <= s->window_size - MIN_LOOKAHEAD, + "need lookahead"); + + Assert(cur_match < s->strstart, "no future"); + + match = s->window + cur_match; + + /* Return failure if the match length is less than 2: + */ + if (match[0] != scan[0] || match[1] != scan[1]) return MIN_MATCH-1; + + /* The check at best_len - 1 can be removed because it will be made + * again later. (This heuristic is not always a win.) + * It is not necessary to compare scan[2] and match[2] since they + * are always equal when the other bytes match, given that + * the hash keys are equal and that HASH_BITS >= 8. + */ + scan += 2, match += 2; + Assert(*scan == *match, "match[2]?"); + + /* We check for insufficient lookahead only every 8th comparison; + * the 256th check will be made at strstart + 258. + */ + do { + } while (*++scan == *++match && *++scan == *++match && + *++scan == *++match && *++scan == *++match && + *++scan == *++match && *++scan == *++match && + *++scan == *++match && *++scan == *++match && + scan < strend); + + Assert(scan <= s->window + (unsigned)(s->window_size - 1), "wild scan"); + + len = MAX_MATCH - (int)(strend - scan); + + if (len < MIN_MATCH) return MIN_MATCH - 1; + + s->match_start = cur_match; + return (uInt)len <= s->lookahead ? (uInt)len : s->lookahead; +} + +#endif /* FASTEST */ + +#ifdef ZLIB_DEBUG + +#define EQUAL 0 +/* result of memcmp for equal strings */ + +/* =========================================================================== + * Check that the match at match_start is indeed a match. + */ +local void check_match(deflate_state *s, IPos start, IPos match, int length) { + /* check that the match is indeed a match */ + Bytef *back = s->window + (int)match, *here = s->window + start; + IPos len = length; + if (match == (IPos)-1) { + /* match starts one byte before the current window -- just compare the + subsequent length-1 bytes */ + back++; + here++; + len--; + } + if (zmemcmp(back, here, len) != EQUAL) { + fprintf(stderr, " start %u, match %d, length %d\n", + start, (int)match, length); + do { + fprintf(stderr, "(%02x %02x)", *back++, *here++); + } while (--len != 0); + z_error("invalid match"); + } + if (z_verbose > 1) { + fprintf(stderr,"\\[%d,%d]", start - match, length); + do { putc(s->window[start++], stderr); } while (--length != 0); + } +} +#else +# define check_match(s, start, match, length) +#endif /* ZLIB_DEBUG */ + +/* =========================================================================== + * Flush the current block, with given end-of-file flag. + * IN assertion: strstart is set to the end of the current match. + */ +#define FLUSH_BLOCK_ONLY(s, last) { \ + _tr_flush_block(s, (s->block_start >= 0L ? \ + (charf *)&s->window[(unsigned)s->block_start] : \ + (charf *)Z_NULL), \ + (ulg)((long)s->strstart - s->block_start), \ + (last)); \ + s->block_start = s->strstart; \ + flush_pending(s->strm); \ + Tracev((stderr,"[FLUSH]")); \ +} + +/* Same but force premature exit if necessary. */ +#define FLUSH_BLOCK(s, last) { \ + FLUSH_BLOCK_ONLY(s, last); \ + if (s->strm->avail_out == 0) return (last) ? finish_started : need_more; \ +} + +/* Maximum stored block length in deflate format (not including header). */ +#define MAX_STORED 65535 + +/* Minimum of a and b. */ +#define MIN(a, b) ((a) > (b) ? (b) : (a)) + +/* =========================================================================== + * Copy without compression as much as possible from the input stream, return + * the current block state. + * + * In case deflateParams() is used to later switch to a non-zero compression + * level, s->matches (otherwise unused when storing) keeps track of the number + * of hash table slides to perform. If s->matches is 1, then one hash table + * slide will be done when switching. If s->matches is 2, the maximum value + * allowed here, then the hash table will be cleared, since two or more slides + * is the same as a clear. + * + * deflate_stored() is written to minimize the number of times an input byte is + * copied. It is most efficient with large input and output buffers, which + * maximizes the opportunities to have a single copy from next_in to next_out. + */ +local block_state deflate_stored(deflate_state *s, int flush) { + /* Smallest worthy block size when not flushing or finishing. By default + * this is 32K. This can be as small as 507 bytes for memLevel == 1. For + * large input and output buffers, the stored block size will be larger. + */ + unsigned min_block = MIN(s->pending_buf_size - 5, s->w_size); + + /* Copy as many min_block or larger stored blocks directly to next_out as + * possible. If flushing, copy the remaining available input to next_out as + * stored blocks, if there is enough space. + */ + unsigned len, left, have, last = 0; + unsigned used = s->strm->avail_in; + do { + /* Set len to the maximum size block that we can copy directly with the + * available input data and output space. Set left to how much of that + * would be copied from what's left in the window. + */ + len = MAX_STORED; /* maximum deflate stored block length */ + have = (s->bi_valid + 42) >> 3; /* number of header bytes */ + if (s->strm->avail_out < have) /* need room for header */ + break; + /* maximum stored block length that will fit in avail_out: */ + have = s->strm->avail_out - have; + left = s->strstart - s->block_start; /* bytes left in window */ + if (len > (ulg)left + s->strm->avail_in) + len = left + s->strm->avail_in; /* limit len to the input */ + if (len > have) + len = have; /* limit len to the output */ + + /* If the stored block would be less than min_block in length, or if + * unable to copy all of the available input when flushing, then try + * copying to the window and the pending buffer instead. Also don't + * write an empty block when flushing -- deflate() does that. + */ + if (len < min_block && ((len == 0 && flush != Z_FINISH) || + flush == Z_NO_FLUSH || + len != left + s->strm->avail_in)) + break; + + /* Make a dummy stored block in pending to get the header bytes, + * including any pending bits. This also updates the debugging counts. + */ + last = flush == Z_FINISH && len == left + s->strm->avail_in ? 1 : 0; + _tr_stored_block(s, (char *)0, 0L, last); + + /* Replace the lengths in the dummy stored block with len. */ + s->pending_buf[s->pending - 4] = len; + s->pending_buf[s->pending - 3] = len >> 8; + s->pending_buf[s->pending - 2] = ~len; + s->pending_buf[s->pending - 1] = ~len >> 8; + + /* Write the stored block header bytes. */ + flush_pending(s->strm); + +#ifdef ZLIB_DEBUG + /* Update debugging counts for the data about to be copied. */ + s->compressed_len += len << 3; + s->bits_sent += len << 3; +#endif + + /* Copy uncompressed bytes from the window to next_out. */ + if (left) { + if (left > len) + left = len; + zmemcpy(s->strm->next_out, s->window + s->block_start, left); + s->strm->next_out += left; + s->strm->avail_out -= left; + s->strm->total_out += left; + s->block_start += left; + len -= left; + } + + /* Copy uncompressed bytes directly from next_in to next_out, updating + * the check value. + */ + if (len) { + read_buf(s->strm, s->strm->next_out, len); + s->strm->next_out += len; + s->strm->avail_out -= len; + s->strm->total_out += len; + } + } while (last == 0); + + /* Update the sliding window with the last s->w_size bytes of the copied + * data, or append all of the copied data to the existing window if less + * than s->w_size bytes were copied. Also update the number of bytes to + * insert in the hash tables, in the event that deflateParams() switches to + * a non-zero compression level. + */ + used -= s->strm->avail_in; /* number of input bytes directly copied */ + if (used) { + /* If any input was used, then no unused input remains in the window, + * therefore s->block_start == s->strstart. + */ + if (used >= s->w_size) { /* supplant the previous history */ + s->matches = 2; /* clear hash */ + zmemcpy(s->window, s->strm->next_in - s->w_size, s->w_size); + s->strstart = s->w_size; + s->insert = s->strstart; + } + else { + if (s->window_size - s->strstart <= used) { + /* Slide the window down. */ + s->strstart -= s->w_size; + zmemcpy(s->window, s->window + s->w_size, s->strstart); + if (s->matches < 2) + s->matches++; /* add a pending slide_hash() */ + if (s->insert > s->strstart) + s->insert = s->strstart; + } + zmemcpy(s->window + s->strstart, s->strm->next_in - used, used); + s->strstart += used; + s->insert += MIN(used, s->w_size - s->insert); + } + s->block_start = s->strstart; + } + if (s->high_water < s->strstart) + s->high_water = s->strstart; + + /* If the last block was written to next_out, then done. */ + if (last) + return finish_done; + + /* If flushing and all input has been consumed, then done. */ + if (flush != Z_NO_FLUSH && flush != Z_FINISH && + s->strm->avail_in == 0 && (long)s->strstart == s->block_start) + return block_done; + + /* Fill the window with any remaining input. */ + have = s->window_size - s->strstart; + if (s->strm->avail_in > have && s->block_start >= (long)s->w_size) { + /* Slide the window down. */ + s->block_start -= s->w_size; + s->strstart -= s->w_size; + zmemcpy(s->window, s->window + s->w_size, s->strstart); + if (s->matches < 2) + s->matches++; /* add a pending slide_hash() */ + have += s->w_size; /* more space now */ + if (s->insert > s->strstart) + s->insert = s->strstart; + } + if (have > s->strm->avail_in) + have = s->strm->avail_in; + if (have) { + read_buf(s->strm, s->window + s->strstart, have); + s->strstart += have; + s->insert += MIN(have, s->w_size - s->insert); + } + if (s->high_water < s->strstart) + s->high_water = s->strstart; + + /* There was not enough avail_out to write a complete worthy or flushed + * stored block to next_out. Write a stored block to pending instead, if we + * have enough input for a worthy block, or if flushing and there is enough + * room for the remaining input as a stored block in the pending buffer. + */ + have = (s->bi_valid + 42) >> 3; /* number of header bytes */ + /* maximum stored block length that will fit in pending: */ + have = MIN(s->pending_buf_size - have, MAX_STORED); + min_block = MIN(have, s->w_size); + left = s->strstart - s->block_start; + if (left >= min_block || + ((left || flush == Z_FINISH) && flush != Z_NO_FLUSH && + s->strm->avail_in == 0 && left <= have)) { + len = MIN(left, have); + last = flush == Z_FINISH && s->strm->avail_in == 0 && + len == left ? 1 : 0; + _tr_stored_block(s, (charf *)s->window + s->block_start, len, last); + s->block_start += len; + flush_pending(s->strm); + } + + /* We've done all we can with the available input and output. */ + return last ? finish_started : need_more; +} + +/* =========================================================================== + * Compress as much as possible from the input stream, return the current + * block state. + * This function does not perform lazy evaluation of matches and inserts + * new strings in the dictionary only for unmatched strings or for short + * matches. It is used only for the fast compression options. + */ +local block_state deflate_fast(deflate_state *s, int flush) { + IPos hash_head; /* head of the hash chain */ + int bflush; /* set if current block must be flushed */ + + for (;;) { + /* Make sure that we always have enough lookahead, except + * at the end of the input file. We need MAX_MATCH bytes + * for the next match, plus MIN_MATCH bytes to insert the + * string following the next match. + */ + if (s->lookahead < MIN_LOOKAHEAD) { + fill_window(s); + if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) { + return need_more; + } + if (s->lookahead == 0) break; /* flush the current block */ + } + + /* Insert the string window[strstart .. strstart + 2] in the + * dictionary, and set hash_head to the head of the hash chain: + */ + hash_head = NIL; + if (s->lookahead >= MIN_MATCH) { + INSERT_STRING(s, s->strstart, hash_head); + } + + /* Find the longest match, discarding those <= prev_length. + * At this point we have always match_length < MIN_MATCH + */ + if (hash_head != NIL && s->strstart - hash_head <= MAX_DIST(s)) { + /* To simplify the code, we prevent matches with the string + * of window index 0 (in particular we have to avoid a match + * of the string with itself at the start of the input file). + */ + s->match_length = longest_match (s, hash_head); + /* longest_match() sets match_start */ + } + if (s->match_length >= MIN_MATCH) { + check_match(s, s->strstart, s->match_start, s->match_length); + + _tr_tally_dist(s, s->strstart - s->match_start, + s->match_length - MIN_MATCH, bflush); + + s->lookahead -= s->match_length; + + /* Insert new strings in the hash table only if the match length + * is not too large. This saves time but degrades compression. + */ +#ifndef FASTEST + if (s->match_length <= s->max_insert_length && + s->lookahead >= MIN_MATCH) { + s->match_length--; /* string at strstart already in table */ + do { + s->strstart++; + INSERT_STRING(s, s->strstart, hash_head); + /* strstart never exceeds WSIZE-MAX_MATCH, so there are + * always MIN_MATCH bytes ahead. + */ + } while (--s->match_length != 0); + s->strstart++; + } else +#endif + { + s->strstart += s->match_length; + s->match_length = 0; + s->ins_h = s->window[s->strstart]; + UPDATE_HASH(s, s->ins_h, s->window[s->strstart + 1]); +#if MIN_MATCH != 3 + Call UPDATE_HASH() MIN_MATCH-3 more times +#endif + /* If lookahead < MIN_MATCH, ins_h is garbage, but it does not + * matter since it will be recomputed at next deflate call. + */ + } + } else { + /* No match, output a literal byte */ + Tracevv((stderr,"%c", s->window[s->strstart])); + _tr_tally_lit(s, s->window[s->strstart], bflush); + s->lookahead--; + s->strstart++; + } + if (bflush) FLUSH_BLOCK(s, 0); + } + s->insert = s->strstart < MIN_MATCH-1 ? s->strstart : MIN_MATCH-1; + if (flush == Z_FINISH) { + FLUSH_BLOCK(s, 1); + return finish_done; + } + if (s->sym_next) + FLUSH_BLOCK(s, 0); + return block_done; +} + +#ifndef FASTEST +/* =========================================================================== + * Same as above, but achieves better compression. We use a lazy + * evaluation for matches: a match is finally adopted only if there is + * no better match at the next window position. + */ +local block_state deflate_slow(deflate_state *s, int flush) { + IPos hash_head; /* head of hash chain */ + int bflush; /* set if current block must be flushed */ + + /* Process the input block. */ + for (;;) { + /* Make sure that we always have enough lookahead, except + * at the end of the input file. We need MAX_MATCH bytes + * for the next match, plus MIN_MATCH bytes to insert the + * string following the next match. + */ + if (s->lookahead < MIN_LOOKAHEAD) { + fill_window(s); + if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) { + return need_more; + } + if (s->lookahead == 0) break; /* flush the current block */ + } + + /* Insert the string window[strstart .. strstart + 2] in the + * dictionary, and set hash_head to the head of the hash chain: + */ + hash_head = NIL; + if (s->lookahead >= MIN_MATCH) { + INSERT_STRING(s, s->strstart, hash_head); + } + + /* Find the longest match, discarding those <= prev_length. + */ + s->prev_length = s->match_length, s->prev_match = s->match_start; + s->match_length = MIN_MATCH-1; + + if (hash_head != NIL && s->prev_length < s->max_lazy_match && + s->strstart - hash_head <= MAX_DIST(s)) { + /* To simplify the code, we prevent matches with the string + * of window index 0 (in particular we have to avoid a match + * of the string with itself at the start of the input file). + */ + s->match_length = longest_match (s, hash_head); + /* longest_match() sets match_start */ + + if (s->match_length <= 5 && (s->strategy == Z_FILTERED +#if TOO_FAR <= 32767 + || (s->match_length == MIN_MATCH && + s->strstart - s->match_start > TOO_FAR) +#endif + )) { + + /* If prev_match is also MIN_MATCH, match_start is garbage + * but we will ignore the current match anyway. + */ + s->match_length = MIN_MATCH-1; + } + } + /* If there was a match at the previous step and the current + * match is not better, output the previous match: + */ + if (s->prev_length >= MIN_MATCH && s->match_length <= s->prev_length) { + uInt max_insert = s->strstart + s->lookahead - MIN_MATCH; + /* Do not insert strings in hash table beyond this. */ + + check_match(s, s->strstart - 1, s->prev_match, s->prev_length); + + _tr_tally_dist(s, s->strstart - 1 - s->prev_match, + s->prev_length - MIN_MATCH, bflush); + + /* Insert in hash table all strings up to the end of the match. + * strstart - 1 and strstart are already inserted. If there is not + * enough lookahead, the last two strings are not inserted in + * the hash table. + */ + s->lookahead -= s->prev_length - 1; + s->prev_length -= 2; + do { + if (++s->strstart <= max_insert) { + INSERT_STRING(s, s->strstart, hash_head); + } + } while (--s->prev_length != 0); + s->match_available = 0; + s->match_length = MIN_MATCH-1; + s->strstart++; + + if (bflush) FLUSH_BLOCK(s, 0); + + } else if (s->match_available) { + /* If there was no match at the previous position, output a + * single literal. If there was a match but the current match + * is longer, truncate the previous match to a single literal. + */ + Tracevv((stderr,"%c", s->window[s->strstart - 1])); + _tr_tally_lit(s, s->window[s->strstart - 1], bflush); + if (bflush) { + FLUSH_BLOCK_ONLY(s, 0); + } + s->strstart++; + s->lookahead--; + if (s->strm->avail_out == 0) return need_more; + } else { + /* There is no previous match to compare with, wait for + * the next step to decide. + */ + s->match_available = 1; + s->strstart++; + s->lookahead--; + } + } + Assert (flush != Z_NO_FLUSH, "no flush?"); + if (s->match_available) { + Tracevv((stderr,"%c", s->window[s->strstart - 1])); + _tr_tally_lit(s, s->window[s->strstart - 1], bflush); + s->match_available = 0; + } + s->insert = s->strstart < MIN_MATCH-1 ? s->strstart : MIN_MATCH-1; + if (flush == Z_FINISH) { + FLUSH_BLOCK(s, 1); + return finish_done; + } + if (s->sym_next) + FLUSH_BLOCK(s, 0); + return block_done; +} +#endif /* FASTEST */ + +/* =========================================================================== + * For Z_RLE, simply look for runs of bytes, generate matches only of distance + * one. Do not maintain a hash table. (It will be regenerated if this run of + * deflate switches away from Z_RLE.) + */ +local block_state deflate_rle(deflate_state *s, int flush) { + int bflush; /* set if current block must be flushed */ + uInt prev; /* byte at distance one to match */ + Bytef *scan, *strend; /* scan goes up to strend for length of run */ + + for (;;) { + /* Make sure that we always have enough lookahead, except + * at the end of the input file. We need MAX_MATCH bytes + * for the longest run, plus one for the unrolled loop. + */ + if (s->lookahead <= MAX_MATCH) { + fill_window(s); + if (s->lookahead <= MAX_MATCH && flush == Z_NO_FLUSH) { + return need_more; + } + if (s->lookahead == 0) break; /* flush the current block */ + } + + /* See how many times the previous byte repeats */ + s->match_length = 0; + if (s->lookahead >= MIN_MATCH && s->strstart > 0) { + scan = s->window + s->strstart - 1; + prev = *scan; + if (prev == *++scan && prev == *++scan && prev == *++scan) { + strend = s->window + s->strstart + MAX_MATCH; + do { + } while (prev == *++scan && prev == *++scan && + prev == *++scan && prev == *++scan && + prev == *++scan && prev == *++scan && + prev == *++scan && prev == *++scan && + scan < strend); + s->match_length = MAX_MATCH - (uInt)(strend - scan); + if (s->match_length > s->lookahead) + s->match_length = s->lookahead; + } + Assert(scan <= s->window + (uInt)(s->window_size - 1), + "wild scan"); + } + + /* Emit match if have run of MIN_MATCH or longer, else emit literal */ + if (s->match_length >= MIN_MATCH) { + check_match(s, s->strstart, s->strstart - 1, s->match_length); + + _tr_tally_dist(s, 1, s->match_length - MIN_MATCH, bflush); + + s->lookahead -= s->match_length; + s->strstart += s->match_length; + s->match_length = 0; + } else { + /* No match, output a literal byte */ + Tracevv((stderr,"%c", s->window[s->strstart])); + _tr_tally_lit(s, s->window[s->strstart], bflush); + s->lookahead--; + s->strstart++; + } + if (bflush) FLUSH_BLOCK(s, 0); + } + s->insert = 0; + if (flush == Z_FINISH) { + FLUSH_BLOCK(s, 1); + return finish_done; + } + if (s->sym_next) + FLUSH_BLOCK(s, 0); + return block_done; +} + +/* =========================================================================== + * For Z_HUFFMAN_ONLY, do not look for matches. Do not maintain a hash table. + * (It will be regenerated if this run of deflate switches away from Huffman.) + */ +local block_state deflate_huff(deflate_state *s, int flush) { + int bflush; /* set if current block must be flushed */ + + for (;;) { + /* Make sure that we have a literal to write. */ + if (s->lookahead == 0) { + fill_window(s); + if (s->lookahead == 0) { + if (flush == Z_NO_FLUSH) + return need_more; + break; /* flush the current block */ + } + } + + /* Output a literal byte */ + s->match_length = 0; + Tracevv((stderr,"%c", s->window[s->strstart])); + _tr_tally_lit(s, s->window[s->strstart], bflush); + s->lookahead--; + s->strstart++; + if (bflush) FLUSH_BLOCK(s, 0); + } + s->insert = 0; + if (flush == Z_FINISH) { + FLUSH_BLOCK(s, 1); + return finish_done; + } + if (s->sym_next) + FLUSH_BLOCK(s, 0); + return block_done; +} diff --git a/c-blosc/internal-complibs/zlib-1.3.1/deflate.h b/c-blosc/internal-complibs/zlib-1.3.1/deflate.h new file mode 100644 index 0000000..300c6ad --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.3.1/deflate.h @@ -0,0 +1,377 @@ +/* deflate.h -- internal compression state + * Copyright (C) 1995-2024 Jean-loup Gailly + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +/* @(#) $Id$ */ + +#ifndef DEFLATE_H +#define DEFLATE_H + +#include "zutil.h" + +/* define NO_GZIP when compiling if you want to disable gzip header and + trailer creation by deflate(). NO_GZIP would be used to avoid linking in + the crc code when it is not needed. For shared libraries, gzip encoding + should be left enabled. */ +#ifndef NO_GZIP +# define GZIP +#endif + +/* define LIT_MEM to slightly increase the speed of deflate (order 1% to 2%) at + the cost of a larger memory footprint */ +/* #define LIT_MEM */ + +/* =========================================================================== + * Internal compression state. + */ + +#define LENGTH_CODES 29 +/* number of length codes, not counting the special END_BLOCK code */ + +#define LITERALS 256 +/* number of literal bytes 0..255 */ + +#define L_CODES (LITERALS+1+LENGTH_CODES) +/* number of Literal or Length codes, including the END_BLOCK code */ + +#define D_CODES 30 +/* number of distance codes */ + +#define BL_CODES 19 +/* number of codes used to transfer the bit lengths */ + +#define HEAP_SIZE (2*L_CODES+1) +/* maximum heap size */ + +#define MAX_BITS 15 +/* All codes must not exceed MAX_BITS bits */ + +#define Buf_size 16 +/* size of bit buffer in bi_buf */ + +#define INIT_STATE 42 /* zlib header -> BUSY_STATE */ +#ifdef GZIP +# define GZIP_STATE 57 /* gzip header -> BUSY_STATE | EXTRA_STATE */ +#endif +#define EXTRA_STATE 69 /* gzip extra block -> NAME_STATE */ +#define NAME_STATE 73 /* gzip file name -> COMMENT_STATE */ +#define COMMENT_STATE 91 /* gzip comment -> HCRC_STATE */ +#define HCRC_STATE 103 /* gzip header CRC -> BUSY_STATE */ +#define BUSY_STATE 113 /* deflate -> FINISH_STATE */ +#define FINISH_STATE 666 /* stream complete */ +/* Stream status */ + + +/* Data structure describing a single value and its code string. */ +typedef struct ct_data_s { + union { + ush freq; /* frequency count */ + ush code; /* bit string */ + } fc; + union { + ush dad; /* father node in Huffman tree */ + ush len; /* length of bit string */ + } dl; +} FAR ct_data; + +#define Freq fc.freq +#define Code fc.code +#define Dad dl.dad +#define Len dl.len + +typedef struct static_tree_desc_s static_tree_desc; + +typedef struct tree_desc_s { + ct_data *dyn_tree; /* the dynamic tree */ + int max_code; /* largest code with non zero frequency */ + const static_tree_desc *stat_desc; /* the corresponding static tree */ +} FAR tree_desc; + +typedef ush Pos; +typedef Pos FAR Posf; +typedef unsigned IPos; + +/* A Pos is an index in the character window. We use short instead of int to + * save space in the various tables. IPos is used only for parameter passing. + */ + +typedef struct internal_state { + z_streamp strm; /* pointer back to this zlib stream */ + int status; /* as the name implies */ + Bytef *pending_buf; /* output still pending */ + ulg pending_buf_size; /* size of pending_buf */ + Bytef *pending_out; /* next pending byte to output to the stream */ + ulg pending; /* nb of bytes in the pending buffer */ + int wrap; /* bit 0 true for zlib, bit 1 true for gzip */ + gz_headerp gzhead; /* gzip header information to write */ + ulg gzindex; /* where in extra, name, or comment */ + Byte method; /* can only be DEFLATED */ + int last_flush; /* value of flush param for previous deflate call */ + + /* used by deflate.c: */ + + uInt w_size; /* LZ77 window size (32K by default) */ + uInt w_bits; /* log2(w_size) (8..16) */ + uInt w_mask; /* w_size - 1 */ + + Bytef *window; + /* Sliding window. Input bytes are read into the second half of the window, + * and move to the first half later to keep a dictionary of at least wSize + * bytes. With this organization, matches are limited to a distance of + * wSize-MAX_MATCH bytes, but this ensures that IO is always + * performed with a length multiple of the block size. Also, it limits + * the window size to 64K, which is quite useful on MSDOS. + * To do: use the user input buffer as sliding window. + */ + + ulg window_size; + /* Actual size of window: 2*wSize, except when the user input buffer + * is directly used as sliding window. + */ + + Posf *prev; + /* Link to older string with same hash index. To limit the size of this + * array to 64K, this link is maintained only for the last 32K strings. + * An index in this array is thus a window index modulo 32K. + */ + + Posf *head; /* Heads of the hash chains or NIL. */ + + uInt ins_h; /* hash index of string to be inserted */ + uInt hash_size; /* number of elements in hash table */ + uInt hash_bits; /* log2(hash_size) */ + uInt hash_mask; /* hash_size-1 */ + + uInt hash_shift; + /* Number of bits by which ins_h must be shifted at each input + * step. It must be such that after MIN_MATCH steps, the oldest + * byte no longer takes part in the hash key, that is: + * hash_shift * MIN_MATCH >= hash_bits + */ + + long block_start; + /* Window position at the beginning of the current output block. Gets + * negative when the window is moved backwards. + */ + + uInt match_length; /* length of best match */ + IPos prev_match; /* previous match */ + int match_available; /* set if previous match exists */ + uInt strstart; /* start of string to insert */ + uInt match_start; /* start of matching string */ + uInt lookahead; /* number of valid bytes ahead in window */ + + uInt prev_length; + /* Length of the best match at previous step. Matches not greater than this + * are discarded. This is used in the lazy match evaluation. + */ + + uInt max_chain_length; + /* To speed up deflation, hash chains are never searched beyond this + * length. A higher limit improves compression ratio but degrades the + * speed. + */ + + uInt max_lazy_match; + /* Attempt to find a better match only when the current match is strictly + * smaller than this value. This mechanism is used only for compression + * levels >= 4. + */ +# define max_insert_length max_lazy_match + /* Insert new strings in the hash table only if the match length is not + * greater than this length. This saves time but degrades compression. + * max_insert_length is used only for compression levels <= 3. + */ + + int level; /* compression level (1..9) */ + int strategy; /* favor or force Huffman coding*/ + + uInt good_match; + /* Use a faster search when the previous match is longer than this */ + + int nice_match; /* Stop searching when current match exceeds this */ + + /* used by trees.c: */ + /* Didn't use ct_data typedef below to suppress compiler warning */ + struct ct_data_s dyn_ltree[HEAP_SIZE]; /* literal and length tree */ + struct ct_data_s dyn_dtree[2*D_CODES+1]; /* distance tree */ + struct ct_data_s bl_tree[2*BL_CODES+1]; /* Huffman tree for bit lengths */ + + struct tree_desc_s l_desc; /* desc. for literal tree */ + struct tree_desc_s d_desc; /* desc. for distance tree */ + struct tree_desc_s bl_desc; /* desc. for bit length tree */ + + ush bl_count[MAX_BITS+1]; + /* number of codes at each bit length for an optimal tree */ + + int heap[2*L_CODES+1]; /* heap used to build the Huffman trees */ + int heap_len; /* number of elements in the heap */ + int heap_max; /* element of largest frequency */ + /* The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used. + * The same heap array is used to build all trees. + */ + + uch depth[2*L_CODES+1]; + /* Depth of each subtree used as tie breaker for trees of equal frequency + */ + +#ifdef LIT_MEM +# define LIT_BUFS 5 + ushf *d_buf; /* buffer for distances */ + uchf *l_buf; /* buffer for literals/lengths */ +#else +# define LIT_BUFS 4 + uchf *sym_buf; /* buffer for distances and literals/lengths */ +#endif + + uInt lit_bufsize; + /* Size of match buffer for literals/lengths. There are 4 reasons for + * limiting lit_bufsize to 64K: + * - frequencies can be kept in 16 bit counters + * - if compression is not successful for the first block, all input + * data is still in the window so we can still emit a stored block even + * when input comes from standard input. (This can also be done for + * all blocks if lit_bufsize is not greater than 32K.) + * - if compression is not successful for a file smaller than 64K, we can + * even emit a stored file instead of a stored block (saving 5 bytes). + * This is applicable only for zip (not gzip or zlib). + * - creating new Huffman trees less frequently may not provide fast + * adaptation to changes in the input data statistics. (Take for + * example a binary file with poorly compressible code followed by + * a highly compressible string table.) Smaller buffer sizes give + * fast adaptation but have of course the overhead of transmitting + * trees more frequently. + * - I can't count above 4 + */ + + uInt sym_next; /* running index in symbol buffer */ + uInt sym_end; /* symbol table full when sym_next reaches this */ + + ulg opt_len; /* bit length of current block with optimal trees */ + ulg static_len; /* bit length of current block with static trees */ + uInt matches; /* number of string matches in current block */ + uInt insert; /* bytes at end of window left to insert */ + +#ifdef ZLIB_DEBUG + ulg compressed_len; /* total bit length of compressed file mod 2^32 */ + ulg bits_sent; /* bit length of compressed data sent mod 2^32 */ +#endif + + ush bi_buf; + /* Output buffer. bits are inserted starting at the bottom (least + * significant bits). + */ + int bi_valid; + /* Number of valid bits in bi_buf. All bits above the last valid bit + * are always zero. + */ + + ulg high_water; + /* High water mark offset in window for initialized bytes -- bytes above + * this are set to zero in order to avoid memory check warnings when + * longest match routines access bytes past the input. This is then + * updated to the new high water mark. + */ + +} FAR deflate_state; + +/* Output a byte on the stream. + * IN assertion: there is enough room in pending_buf. + */ +#define put_byte(s, c) {s->pending_buf[s->pending++] = (Bytef)(c);} + + +#define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1) +/* Minimum amount of lookahead, except at the end of the input file. + * See deflate.c for comments about the MIN_MATCH+1. + */ + +#define MAX_DIST(s) ((s)->w_size-MIN_LOOKAHEAD) +/* In order to simplify the code, particularly on 16 bit machines, match + * distances are limited to MAX_DIST instead of WSIZE. + */ + +#define WIN_INIT MAX_MATCH +/* Number of bytes after end of data in window to initialize in order to avoid + memory checker errors from longest match routines */ + + /* in trees.c */ +void ZLIB_INTERNAL _tr_init(deflate_state *s); +int ZLIB_INTERNAL _tr_tally(deflate_state *s, unsigned dist, unsigned lc); +void ZLIB_INTERNAL _tr_flush_block(deflate_state *s, charf *buf, + ulg stored_len, int last); +void ZLIB_INTERNAL _tr_flush_bits(deflate_state *s); +void ZLIB_INTERNAL _tr_align(deflate_state *s); +void ZLIB_INTERNAL _tr_stored_block(deflate_state *s, charf *buf, + ulg stored_len, int last); + +#define d_code(dist) \ + ((dist) < 256 ? _dist_code[dist] : _dist_code[256+((dist)>>7)]) +/* Mapping from a distance to a distance code. dist is the distance - 1 and + * must not have side effects. _dist_code[256] and _dist_code[257] are never + * used. + */ + +#ifndef ZLIB_DEBUG +/* Inline versions of _tr_tally for speed: */ + +#if defined(GEN_TREES_H) || !defined(STDC) + extern uch ZLIB_INTERNAL _length_code[]; + extern uch ZLIB_INTERNAL _dist_code[]; +#else + extern const uch ZLIB_INTERNAL _length_code[]; + extern const uch ZLIB_INTERNAL _dist_code[]; +#endif + +#ifdef LIT_MEM +# define _tr_tally_lit(s, c, flush) \ + { uch cc = (c); \ + s->d_buf[s->sym_next] = 0; \ + s->l_buf[s->sym_next++] = cc; \ + s->dyn_ltree[cc].Freq++; \ + flush = (s->sym_next == s->sym_end); \ + } +# define _tr_tally_dist(s, distance, length, flush) \ + { uch len = (uch)(length); \ + ush dist = (ush)(distance); \ + s->d_buf[s->sym_next] = dist; \ + s->l_buf[s->sym_next++] = len; \ + dist--; \ + s->dyn_ltree[_length_code[len]+LITERALS+1].Freq++; \ + s->dyn_dtree[d_code(dist)].Freq++; \ + flush = (s->sym_next == s->sym_end); \ + } +#else +# define _tr_tally_lit(s, c, flush) \ + { uch cc = (c); \ + s->sym_buf[s->sym_next++] = 0; \ + s->sym_buf[s->sym_next++] = 0; \ + s->sym_buf[s->sym_next++] = cc; \ + s->dyn_ltree[cc].Freq++; \ + flush = (s->sym_next == s->sym_end); \ + } +# define _tr_tally_dist(s, distance, length, flush) \ + { uch len = (uch)(length); \ + ush dist = (ush)(distance); \ + s->sym_buf[s->sym_next++] = (uch)dist; \ + s->sym_buf[s->sym_next++] = (uch)(dist >> 8); \ + s->sym_buf[s->sym_next++] = len; \ + dist--; \ + s->dyn_ltree[_length_code[len]+LITERALS+1].Freq++; \ + s->dyn_dtree[d_code(dist)].Freq++; \ + flush = (s->sym_next == s->sym_end); \ + } +#endif +#else +# define _tr_tally_lit(s, c, flush) flush = _tr_tally(s, 0, c) +# define _tr_tally_dist(s, distance, length, flush) \ + flush = _tr_tally(s, distance, length) +#endif + +#endif /* DEFLATE_H */ diff --git a/c-blosc/internal-complibs/zlib-1.3.1/examples/enough.c b/c-blosc/internal-complibs/zlib-1.3.1/examples/enough.c new file mode 100644 index 0000000..8a3cade --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.3.1/examples/enough.c @@ -0,0 +1,597 @@ +/* enough.c -- determine the maximum size of inflate's Huffman code tables over + * all possible valid and complete prefix codes, subject to a length limit. + * Copyright (C) 2007, 2008, 2012, 2018 Mark Adler + * Version 1.5 5 August 2018 Mark Adler + */ + +/* Version history: + 1.0 3 Jan 2007 First version (derived from codecount.c version 1.4) + 1.1 4 Jan 2007 Use faster incremental table usage computation + Prune examine() search on previously visited states + 1.2 5 Jan 2007 Comments clean up + As inflate does, decrease root for short codes + Refuse cases where inflate would increase root + 1.3 17 Feb 2008 Add argument for initial root table size + Fix bug for initial root table size == max - 1 + Use a macro to compute the history index + 1.4 18 Aug 2012 Avoid shifts more than bits in type (caused endless loop!) + Clean up comparisons of different types + Clean up code indentation + 1.5 5 Aug 2018 Clean up code style, formatting, and comments + Show all the codes for the maximum, and only the maximum + */ + +/* + Examine all possible prefix codes for a given number of symbols and a + maximum code length in bits to determine the maximum table size for zlib's + inflate. Only complete prefix codes are counted. + + Two codes are considered distinct if the vectors of the number of codes per + length are not identical. So permutations of the symbol assignments result + in the same code for the counting, as do permutations of the assignments of + the bit values to the codes (i.e. only canonical codes are counted). + + We build a code from shorter to longer lengths, determining how many symbols + are coded at each length. At each step, we have how many symbols remain to + be coded, what the last code length used was, and how many bit patterns of + that length remain unused. Then we add one to the code length and double the + number of unused patterns to graduate to the next code length. We then + assign all portions of the remaining symbols to that code length that + preserve the properties of a correct and eventually complete code. Those + properties are: we cannot use more bit patterns than are available; and when + all the symbols are used, there are exactly zero possible bit patterns left + unused. + + The inflate Huffman decoding algorithm uses two-level lookup tables for + speed. There is a single first-level table to decode codes up to root bits + in length (root == 9 for literal/length codes and root == 6 for distance + codes, in the current inflate implementation). The base table has 1 << root + entries and is indexed by the next root bits of input. Codes shorter than + root bits have replicated table entries, so that the correct entry is + pointed to regardless of the bits that follow the short code. If the code is + longer than root bits, then the table entry points to a second-level table. + The size of that table is determined by the longest code with that root-bit + prefix. If that longest code has length len, then the table has size 1 << + (len - root), to index the remaining bits in that set of codes. Each + subsequent root-bit prefix then has its own sub-table. The total number of + table entries required by the code is calculated incrementally as the number + of codes at each bit length is populated. When all of the codes are shorter + than root bits, then root is reduced to the longest code length, resulting + in a single, smaller, one-level table. + + The inflate algorithm also provides for small values of root (relative to + the log2 of the number of symbols), where the shortest code has more bits + than root. In that case, root is increased to the length of the shortest + code. This program, by design, does not handle that case, so it is verified + that the number of symbols is less than 1 << (root + 1). + + In order to speed up the examination (by about ten orders of magnitude for + the default arguments), the intermediate states in the build-up of a code + are remembered and previously visited branches are pruned. The memory + required for this will increase rapidly with the total number of symbols and + the maximum code length in bits. However this is a very small price to pay + for the vast speedup. + + First, all of the possible prefix codes are counted, and reachable + intermediate states are noted by a non-zero count in a saved-results array. + Second, the intermediate states that lead to (root + 1) bit or longer codes + are used to look at all sub-codes from those junctures for their inflate + memory usage. (The amount of memory used is not affected by the number of + codes of root bits or less in length.) Third, the visited states in the + construction of those sub-codes and the associated calculation of the table + size is recalled in order to avoid recalculating from the same juncture. + Beginning the code examination at (root + 1) bit codes, which is enabled by + identifying the reachable nodes, accounts for about six of the orders of + magnitude of improvement for the default arguments. About another four + orders of magnitude come from not revisiting previous states. Out of + approximately 2x10^16 possible prefix codes, only about 2x10^6 sub-codes + need to be examined to cover all of the possible table memory usage cases + for the default arguments of 286 symbols limited to 15-bit codes. + + Note that the uintmax_t type is used for counting. It is quite easy to + exceed the capacity of an eight-byte integer with a large number of symbols + and a large maximum code length, so multiple-precision arithmetic would need + to replace the integer arithmetic in that case. This program will abort if + an overflow occurs. The big_t type identifies where the counting takes + place. + + The uintmax_t type is also used for calculating the number of possible codes + remaining at the maximum length. This limits the maximum code length to the + number of bits in a long long minus the number of bits needed to represent + the symbols in a flat code. The code_t type identifies where the bit-pattern + counting takes place. + */ + +#include +#include +#include +#include +#include +#include + +#define local static + +// Special data types. +typedef uintmax_t big_t; // type for code counting +#define PRIbig "ju" // printf format for big_t +typedef uintmax_t code_t; // type for bit pattern counting +struct tab { // type for been-here check + size_t len; // allocated length of bit vector in octets + char *vec; // allocated bit vector +}; + +/* The array for saving results, num[], is indexed with this triplet: + + syms: number of symbols remaining to code + left: number of available bit patterns at length len + len: number of bits in the codes currently being assigned + + Those indices are constrained thusly when saving results: + + syms: 3..totsym (totsym == total symbols to code) + left: 2..syms - 1, but only the evens (so syms == 8 -> 2, 4, 6) + len: 1..max - 1 (max == maximum code length in bits) + + syms == 2 is not saved since that immediately leads to a single code. left + must be even, since it represents the number of available bit patterns at + the current length, which is double the number at the previous length. left + ends at syms-1 since left == syms immediately results in a single code. + (left > sym is not allowed since that would result in an incomplete code.) + len is less than max, since the code completes immediately when len == max. + + The offset into the array is calculated for the three indices with the first + one (syms) being outermost, and the last one (len) being innermost. We build + the array with length max-1 lists for the len index, with syms-3 of those + for each symbol. There are totsym-2 of those, with each one varying in + length as a function of sym. See the calculation of index in map() for the + index, and the calculation of size in main() for the size of the array. + + For the deflate example of 286 symbols limited to 15-bit codes, the array + has 284,284 entries, taking up 2.17 MB for an 8-byte big_t. More than half + of the space allocated for saved results is actually used -- not all + possible triplets are reached in the generation of valid prefix codes. + */ + +/* The array for tracking visited states, done[], is itself indexed identically + to the num[] array as described above for the (syms, left, len) triplet. + Each element in the array is further indexed by the (mem, rem) doublet, + where mem is the amount of inflate table space used so far, and rem is the + remaining unused entries in the current inflate sub-table. Each indexed + element is simply one bit indicating whether the state has been visited or + not. Since the ranges for mem and rem are not known a priori, each bit + vector is of a variable size, and grows as needed to accommodate the visited + states. mem and rem are used to calculate a single index in a triangular + array. Since the range of mem is expected in the default case to be about + ten times larger than the range of rem, the array is skewed to reduce the + memory usage, with eight times the range for mem than for rem. See the + calculations for offset and bit in been_here() for the details. + + For the deflate example of 286 symbols limited to 15-bit codes, the bit + vectors grow to total 5.5 MB, in addition to the 4.3 MB done array itself. + */ + +// Type for a variable-length, allocated string. +typedef struct { + char *str; // pointer to allocated string + size_t size; // size of allocation + size_t len; // length of string, not including terminating zero +} string_t; + +// Clear a string_t. +local void string_clear(string_t *s) { + s->str[0] = 0; + s->len = 0; +} + +// Initialize a string_t. +local void string_init(string_t *s) { + s->size = 16; + s->str = malloc(s->size); + assert(s->str != NULL && "out of memory"); + string_clear(s); +} + +// Release the allocation of a string_t. +local void string_free(string_t *s) { + free(s->str); + s->str = NULL; + s->size = 0; + s->len = 0; +} + +// Save the results of printf with fmt and the subsequent argument list to s. +// Each call appends to s. The allocated space for s is increased as needed. +local void string_printf(string_t *s, char *fmt, ...) { + va_list ap; + va_start(ap, fmt); + size_t len = s->len; + int ret = vsnprintf(s->str + len, s->size - len, fmt, ap); + assert(ret >= 0 && "out of memory"); + s->len += ret; + if (s->size < s->len + 1) { + do { + s->size <<= 1; + assert(s->size != 0 && "overflow"); + } while (s->size < s->len + 1); + s->str = realloc(s->str, s->size); + assert(s->str != NULL && "out of memory"); + vsnprintf(s->str + len, s->size - len, fmt, ap); + } + va_end(ap); +} + +// Globals to avoid propagating constants or constant pointers recursively. +struct { + int max; // maximum allowed bit length for the codes + int root; // size of base code table in bits + int large; // largest code table so far + size_t size; // number of elements in num and done + big_t tot; // total number of codes with maximum tables size + string_t out; // display of subcodes for maximum tables size + int *code; // number of symbols assigned to each bit length + big_t *num; // saved results array for code counting + struct tab *done; // states already evaluated array +} g; + +// Index function for num[] and done[]. +local inline size_t map(int syms, int left, int len) { + return ((size_t)((syms - 1) >> 1) * ((syms - 2) >> 1) + + (left >> 1) - 1) * (g.max - 1) + + len - 1; +} + +// Free allocated space in globals. +local void cleanup(void) { + if (g.done != NULL) { + for (size_t n = 0; n < g.size; n++) + if (g.done[n].len) + free(g.done[n].vec); + g.size = 0; + free(g.done); g.done = NULL; + } + free(g.num); g.num = NULL; + free(g.code); g.code = NULL; + string_free(&g.out); +} + +// Return the number of possible prefix codes using bit patterns of lengths len +// through max inclusive, coding syms symbols, with left bit patterns of length +// len unused -- return -1 if there is an overflow in the counting. Keep a +// record of previous results in num to prevent repeating the same calculation. +local big_t count(int syms, int left, int len) { + // see if only one possible code + if (syms == left) + return 1; + + // note and verify the expected state + assert(syms > left && left > 0 && len < g.max); + + // see if we've done this one already + size_t index = map(syms, left, len); + big_t got = g.num[index]; + if (got) + return got; // we have -- return the saved result + + // we need to use at least this many bit patterns so that the code won't be + // incomplete at the next length (more bit patterns than symbols) + int least = (left << 1) - syms; + if (least < 0) + least = 0; + + // we can use at most this many bit patterns, lest there not be enough + // available for the remaining symbols at the maximum length (if there were + // no limit to the code length, this would become: most = left - 1) + int most = (((code_t)left << (g.max - len)) - syms) / + (((code_t)1 << (g.max - len)) - 1); + + // count all possible codes from this juncture and add them up + big_t sum = 0; + for (int use = least; use <= most; use++) { + got = count(syms - use, (left - use) << 1, len + 1); + sum += got; + if (got == (big_t)-1 || sum < got) // overflow + return (big_t)-1; + } + + // verify that all recursive calls are productive + assert(sum != 0); + + // save the result and return it + g.num[index] = sum; + return sum; +} + +// Return true if we've been here before, set to true if not. Set a bit in a +// bit vector to indicate visiting this state. Each (syms,len,left) state has a +// variable size bit vector indexed by (mem,rem). The bit vector is lengthened +// as needed to allow setting the (mem,rem) bit. +local int been_here(int syms, int left, int len, int mem, int rem) { + // point to vector for (syms,left,len), bit in vector for (mem,rem) + size_t index = map(syms, left, len); + mem -= 1 << g.root; // mem always includes the root table + mem >>= 1; // mem and rem are always even + rem >>= 1; + size_t offset = (mem >> 3) + rem; + offset = ((offset * (offset + 1)) >> 1) + rem; + int bit = 1 << (mem & 7); + + // see if we've been here + size_t length = g.done[index].len; + if (offset < length && (g.done[index].vec[offset] & bit) != 0) + return 1; // done this! + + // we haven't been here before -- set the bit to show we have now + + // see if we need to lengthen the vector in order to set the bit + if (length <= offset) { + // if we have one already, enlarge it, zero out the appended space + char *vector; + if (length) { + do { + length <<= 1; + } while (length <= offset); + vector = realloc(g.done[index].vec, length); + assert(vector != NULL && "out of memory"); + memset(vector + g.done[index].len, 0, length - g.done[index].len); + } + + // otherwise we need to make a new vector and zero it out + else { + length = 16; + while (length <= offset) + length <<= 1; + vector = calloc(length, 1); + assert(vector != NULL && "out of memory"); + } + + // install the new vector + g.done[index].len = length; + g.done[index].vec = vector; + } + + // set the bit + g.done[index].vec[offset] |= bit; + return 0; +} + +// Examine all possible codes from the given node (syms, len, left). Compute +// the amount of memory required to build inflate's decoding tables, where the +// number of code structures used so far is mem, and the number remaining in +// the current sub-table is rem. +local void examine(int syms, int left, int len, int mem, int rem) { + // see if we have a complete code + if (syms == left) { + // set the last code entry + g.code[len] = left; + + // complete computation of memory used by this code + while (rem < left) { + left -= rem; + rem = 1 << (len - g.root); + mem += rem; + } + assert(rem == left); + + // if this is at the maximum, show the sub-code + if (mem >= g.large) { + // if this is a new maximum, update the maximum and clear out the + // printed sub-codes from the previous maximum + if (mem > g.large) { + g.large = mem; + string_clear(&g.out); + } + + // compute the starting state for this sub-code + syms = 0; + left = 1 << g.max; + for (int bits = g.max; bits > g.root; bits--) { + syms += g.code[bits]; + left -= g.code[bits]; + assert((left & 1) == 0); + left >>= 1; + } + + // print the starting state and the resulting sub-code to g.out + string_printf(&g.out, "<%u, %u, %u>:", + syms, g.root + 1, ((1 << g.root) - left) << 1); + for (int bits = g.root + 1; bits <= g.max; bits++) + if (g.code[bits]) + string_printf(&g.out, " %d[%d]", g.code[bits], bits); + string_printf(&g.out, "\n"); + } + + // remove entries as we drop back down in the recursion + g.code[len] = 0; + return; + } + + // prune the tree if we can + if (been_here(syms, left, len, mem, rem)) + return; + + // we need to use at least this many bit patterns so that the code won't be + // incomplete at the next length (more bit patterns than symbols) + int least = (left << 1) - syms; + if (least < 0) + least = 0; + + // we can use at most this many bit patterns, lest there not be enough + // available for the remaining symbols at the maximum length (if there were + // no limit to the code length, this would become: most = left - 1) + int most = (((code_t)left << (g.max - len)) - syms) / + (((code_t)1 << (g.max - len)) - 1); + + // occupy least table spaces, creating new sub-tables as needed + int use = least; + while (rem < use) { + use -= rem; + rem = 1 << (len - g.root); + mem += rem; + } + rem -= use; + + // examine codes from here, updating table space as we go + for (use = least; use <= most; use++) { + g.code[len] = use; + examine(syms - use, (left - use) << 1, len + 1, + mem + (rem ? 1 << (len - g.root) : 0), rem << 1); + if (rem == 0) { + rem = 1 << (len - g.root); + mem += rem; + } + rem--; + } + + // remove entries as we drop back down in the recursion + g.code[len] = 0; +} + +// Look at all sub-codes starting with root + 1 bits. Look at only the valid +// intermediate code states (syms, left, len). For each completed code, +// calculate the amount of memory required by inflate to build the decoding +// tables. Find the maximum amount of memory required and show the codes that +// require that maximum. +local void enough(int syms) { + // clear code + for (int n = 0; n <= g.max; n++) + g.code[n] = 0; + + // look at all (root + 1) bit and longer codes + string_clear(&g.out); // empty saved results + g.large = 1 << g.root; // base table + if (g.root < g.max) // otherwise, there's only a base table + for (int n = 3; n <= syms; n++) + for (int left = 2; left < n; left += 2) { + // look at all reachable (root + 1) bit nodes, and the + // resulting codes (complete at root + 2 or more) + size_t index = map(n, left, g.root + 1); + if (g.root + 1 < g.max && g.num[index]) // reachable node + examine(n, left, g.root + 1, 1 << g.root, 0); + + // also look at root bit codes with completions at root + 1 + // bits (not saved in num, since complete), just in case + if (g.num[index - 1] && n <= left << 1) + examine((n - left) << 1, (n - left) << 1, g.root + 1, + 1 << g.root, 0); + } + + // done + printf("maximum of %d table entries for root = %d\n", g.large, g.root); + fputs(g.out.str, stdout); +} + +// Examine and show the total number of possible prefix codes for a given +// maximum number of symbols, initial root table size, and maximum code length +// in bits -- those are the command arguments in that order. The default values +// are 286, 9, and 15 respectively, for the deflate literal/length code. The +// possible codes are counted for each number of coded symbols from two to the +// maximum. The counts for each of those and the total number of codes are +// shown. The maximum number of inflate table entries is then calculated across +// all possible codes. Each new maximum number of table entries and the +// associated sub-code (starting at root + 1 == 10 bits) is shown. +// +// To count and examine prefix codes that are not length-limited, provide a +// maximum length equal to the number of symbols minus one. +// +// For the deflate literal/length code, use "enough". For the deflate distance +// code, use "enough 30 6". +int main(int argc, char **argv) { + // set up globals for cleanup() + g.code = NULL; + g.num = NULL; + g.done = NULL; + string_init(&g.out); + + // get arguments -- default to the deflate literal/length code + int syms = 286; + g.root = 9; + g.max = 15; + if (argc > 1) { + syms = atoi(argv[1]); + if (argc > 2) { + g.root = atoi(argv[2]); + if (argc > 3) + g.max = atoi(argv[3]); + } + } + if (argc > 4 || syms < 2 || g.root < 1 || g.max < 1) { + fputs("invalid arguments, need: [sym >= 2 [root >= 1 [max >= 1]]]\n", + stderr); + return 1; + } + + // if not restricting the code length, the longest is syms - 1 + if (g.max > syms - 1) + g.max = syms - 1; + + // determine the number of bits in a code_t + int bits = 0; + for (code_t word = 1; word; word <<= 1) + bits++; + + // make sure that the calculation of most will not overflow + if (g.max > bits || (code_t)(syms - 2) >= ((code_t)-1 >> (g.max - 1))) { + fputs("abort: code length too long for internal types\n", stderr); + return 1; + } + + // reject impossible code requests + if ((code_t)(syms - 1) > ((code_t)1 << g.max) - 1) { + fprintf(stderr, "%d symbols cannot be coded in %d bits\n", + syms, g.max); + return 1; + } + + // allocate code vector + g.code = calloc(g.max + 1, sizeof(int)); + assert(g.code != NULL && "out of memory"); + + // determine size of saved results array, checking for overflows, + // allocate and clear the array (set all to zero with calloc()) + if (syms == 2) // iff max == 1 + g.num = NULL; // won't be saving any results + else { + g.size = syms >> 1; + int n = (syms - 1) >> 1; + assert(g.size <= (size_t)-1 / n && "overflow"); + g.size *= n; + n = g.max - 1; + assert(g.size <= (size_t)-1 / n && "overflow"); + g.size *= n; + g.num = calloc(g.size, sizeof(big_t)); + assert(g.num != NULL && "out of memory"); + } + + // count possible codes for all numbers of symbols, add up counts + big_t sum = 0; + for (int n = 2; n <= syms; n++) { + big_t got = count(n, 2, 1); + sum += got; + assert(got != (big_t)-1 && sum >= got && "overflow"); + } + printf("%"PRIbig" total codes for 2 to %d symbols", sum, syms); + if (g.max < syms - 1) + printf(" (%d-bit length limit)\n", g.max); + else + puts(" (no length limit)"); + + // allocate and clear done array for been_here() + if (syms == 2) + g.done = NULL; + else { + g.done = calloc(g.size, sizeof(struct tab)); + assert(g.done != NULL && "out of memory"); + } + + // find and show maximum inflate table usage + if (g.root > g.max) // reduce root to max length + g.root = g.max; + if ((code_t)syms < ((code_t)1 << (g.root + 1))) + enough(syms); + else + fputs("cannot handle minimum code lengths > root", stderr); + + // done + cleanup(); + return 0; +} diff --git a/c-blosc/internal-complibs/zlib-1.3.1/examples/fitblk.c b/c-blosc/internal-complibs/zlib-1.3.1/examples/fitblk.c new file mode 100644 index 0000000..723dc00 --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.3.1/examples/fitblk.c @@ -0,0 +1,233 @@ +/* fitblk.c: example of fitting compressed output to a specified size + Not copyrighted -- provided to the public domain + Version 1.1 25 November 2004 Mark Adler */ + +/* Version history: + 1.0 24 Nov 2004 First version + 1.1 25 Nov 2004 Change deflateInit2() to deflateInit() + Use fixed-size, stack-allocated raw buffers + Simplify code moving compression to subroutines + Use assert() for internal errors + Add detailed description of approach + */ + +/* Approach to just fitting a requested compressed size: + + fitblk performs three compression passes on a portion of the input + data in order to determine how much of that input will compress to + nearly the requested output block size. The first pass generates + enough deflate blocks to produce output to fill the requested + output size plus a specified excess amount (see the EXCESS define + below). The last deflate block may go quite a bit past that, but + is discarded. The second pass decompresses and recompresses just + the compressed data that fit in the requested plus excess sized + buffer. The deflate process is terminated after that amount of + input, which is less than the amount consumed on the first pass. + The last deflate block of the result will be of a comparable size + to the final product, so that the header for that deflate block and + the compression ratio for that block will be about the same as in + the final product. The third compression pass decompresses the + result of the second step, but only the compressed data up to the + requested size minus an amount to allow the compressed stream to + complete (see the MARGIN define below). That will result in a + final compressed stream whose length is less than or equal to the + requested size. Assuming sufficient input and a requested size + greater than a few hundred bytes, the shortfall will typically be + less than ten bytes. + + If the input is short enough that the first compression completes + before filling the requested output size, then that compressed + stream is return with no recompression. + + EXCESS is chosen to be just greater than the shortfall seen in a + two pass approach similar to the above. That shortfall is due to + the last deflate block compressing more efficiently with a smaller + header on the second pass. EXCESS is set to be large enough so + that there is enough uncompressed data for the second pass to fill + out the requested size, and small enough so that the final deflate + block of the second pass will be close in size to the final deflate + block of the third and final pass. MARGIN is chosen to be just + large enough to assure that the final compression has enough room + to complete in all cases. + */ + +#include +#include +#include +#include "zlib.h" + +#define local static + +/* print nastygram and leave */ +local void quit(char *why) +{ + fprintf(stderr, "fitblk abort: %s\n", why); + exit(1); +} + +#define RAWLEN 4096 /* intermediate uncompressed buffer size */ + +/* compress from file to def until provided buffer is full or end of + input reached; return last deflate() return value, or Z_ERRNO if + there was read error on the file */ +local int partcompress(FILE *in, z_streamp def) +{ + int ret, flush; + unsigned char raw[RAWLEN]; + + flush = Z_NO_FLUSH; + do { + def->avail_in = fread(raw, 1, RAWLEN, in); + if (ferror(in)) + return Z_ERRNO; + def->next_in = raw; + if (feof(in)) + flush = Z_FINISH; + ret = deflate(def, flush); + assert(ret != Z_STREAM_ERROR); + } while (def->avail_out != 0 && flush == Z_NO_FLUSH); + return ret; +} + +/* recompress from inf's input to def's output; the input for inf and + the output for def are set in those structures before calling; + return last deflate() return value, or Z_MEM_ERROR if inflate() + was not able to allocate enough memory when it needed to */ +local int recompress(z_streamp inf, z_streamp def) +{ + int ret, flush; + unsigned char raw[RAWLEN]; + + flush = Z_NO_FLUSH; + do { + /* decompress */ + inf->avail_out = RAWLEN; + inf->next_out = raw; + ret = inflate(inf, Z_NO_FLUSH); + assert(ret != Z_STREAM_ERROR && ret != Z_DATA_ERROR && + ret != Z_NEED_DICT); + if (ret == Z_MEM_ERROR) + return ret; + + /* compress what was decompressed until done or no room */ + def->avail_in = RAWLEN - inf->avail_out; + def->next_in = raw; + if (inf->avail_out != 0) + flush = Z_FINISH; + ret = deflate(def, flush); + assert(ret != Z_STREAM_ERROR); + } while (ret != Z_STREAM_END && def->avail_out != 0); + return ret; +} + +#define EXCESS 256 /* empirically determined stream overage */ +#define MARGIN 8 /* amount to back off for completion */ + +/* compress from stdin to fixed-size block on stdout */ +int main(int argc, char **argv) +{ + int ret; /* return code */ + unsigned size; /* requested fixed output block size */ + unsigned have; /* bytes written by deflate() call */ + unsigned char *blk; /* intermediate and final stream */ + unsigned char *tmp; /* close to desired size stream */ + z_stream def, inf; /* zlib deflate and inflate states */ + + /* get requested output size */ + if (argc != 2) + quit("need one argument: size of output block"); + ret = strtol(argv[1], argv + 1, 10); + if (argv[1][0] != 0) + quit("argument must be a number"); + if (ret < 8) /* 8 is minimum zlib stream size */ + quit("need positive size of 8 or greater"); + size = (unsigned)ret; + + /* allocate memory for buffers and compression engine */ + blk = malloc(size + EXCESS); + def.zalloc = Z_NULL; + def.zfree = Z_NULL; + def.opaque = Z_NULL; + ret = deflateInit(&def, Z_DEFAULT_COMPRESSION); + if (ret != Z_OK || blk == NULL) + quit("out of memory"); + + /* compress from stdin until output full, or no more input */ + def.avail_out = size + EXCESS; + def.next_out = blk; + ret = partcompress(stdin, &def); + if (ret == Z_ERRNO) + quit("error reading input"); + + /* if it all fit, then size was undersubscribed -- done! */ + if (ret == Z_STREAM_END && def.avail_out >= EXCESS) { + /* write block to stdout */ + have = size + EXCESS - def.avail_out; + if (fwrite(blk, 1, have, stdout) != have || ferror(stdout)) + quit("error writing output"); + + /* clean up and print results to stderr */ + ret = deflateEnd(&def); + assert(ret != Z_STREAM_ERROR); + free(blk); + fprintf(stderr, + "%u bytes unused out of %u requested (all input)\n", + size - have, size); + return 0; + } + + /* it didn't all fit -- set up for recompression */ + inf.zalloc = Z_NULL; + inf.zfree = Z_NULL; + inf.opaque = Z_NULL; + inf.avail_in = 0; + inf.next_in = Z_NULL; + ret = inflateInit(&inf); + tmp = malloc(size + EXCESS); + if (ret != Z_OK || tmp == NULL) + quit("out of memory"); + ret = deflateReset(&def); + assert(ret != Z_STREAM_ERROR); + + /* do first recompression close to the right amount */ + inf.avail_in = size + EXCESS; + inf.next_in = blk; + def.avail_out = size + EXCESS; + def.next_out = tmp; + ret = recompress(&inf, &def); + if (ret == Z_MEM_ERROR) + quit("out of memory"); + + /* set up for next recompression */ + ret = inflateReset(&inf); + assert(ret != Z_STREAM_ERROR); + ret = deflateReset(&def); + assert(ret != Z_STREAM_ERROR); + + /* do second and final recompression (third compression) */ + inf.avail_in = size - MARGIN; /* assure stream will complete */ + inf.next_in = tmp; + def.avail_out = size; + def.next_out = blk; + ret = recompress(&inf, &def); + if (ret == Z_MEM_ERROR) + quit("out of memory"); + assert(ret == Z_STREAM_END); /* otherwise MARGIN too small */ + + /* done -- write block to stdout */ + have = size - def.avail_out; + if (fwrite(blk, 1, have, stdout) != have || ferror(stdout)) + quit("error writing output"); + + /* clean up and print results to stderr */ + free(tmp); + ret = inflateEnd(&inf); + assert(ret != Z_STREAM_ERROR); + ret = deflateEnd(&def); + assert(ret != Z_STREAM_ERROR); + free(blk); + fprintf(stderr, + "%u bytes unused out of %u requested (%lu input)\n", + size - have, size, def.total_in); + return 0; +} diff --git a/c-blosc/internal-complibs/zlib-1.3.1/examples/gun.c b/c-blosc/internal-complibs/zlib-1.3.1/examples/gun.c new file mode 100644 index 0000000..bea5497 --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.3.1/examples/gun.c @@ -0,0 +1,702 @@ +/* gun.c -- simple gunzip to give an example of the use of inflateBack() + * Copyright (C) 2003, 2005, 2008, 2010, 2012 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + Version 1.7 12 August 2012 Mark Adler */ + +/* Version history: + 1.0 16 Feb 2003 First version for testing of inflateBack() + 1.1 21 Feb 2005 Decompress concatenated gzip streams + Remove use of "this" variable (C++ keyword) + Fix return value for in() + Improve allocation failure checking + Add typecasting for void * structures + Add -h option for command version and usage + Add a bunch of comments + 1.2 20 Mar 2005 Add Unix compress (LZW) decompression + Copy file attributes from input file to output file + 1.3 12 Jun 2005 Add casts for error messages [Oberhumer] + 1.4 8 Dec 2006 LZW decompression speed improvements + 1.5 9 Feb 2008 Avoid warning in latest version of gcc + 1.6 17 Jan 2010 Avoid signed/unsigned comparison warnings + 1.7 12 Aug 2012 Update for z_const usage in zlib 1.2.8 + */ + +/* + gun [ -t ] [ name ... ] + + decompresses the data in the named gzip files. If no arguments are given, + gun will decompress from stdin to stdout. The names must end in .gz, -gz, + .z, -z, _z, or .Z. The uncompressed data will be written to a file name + with the suffix stripped. On success, the original file is deleted. On + failure, the output file is deleted. For most failures, the command will + continue to process the remaining names on the command line. A memory + allocation failure will abort the command. If -t is specified, then the + listed files or stdin will be tested as gzip files for integrity (without + checking for a proper suffix), no output will be written, and no files + will be deleted. + + Like gzip, gun allows concatenated gzip streams and will decompress them, + writing all of the uncompressed data to the output. Unlike gzip, gun allows + an empty file on input, and will produce no error writing an empty output + file. + + gun will also decompress files made by Unix compress, which uses LZW + compression. These files are automatically detected by virtue of their + magic header bytes. Since the end of Unix compress stream is marked by the + end-of-file, they cannot be concatenated. If a Unix compress stream is + encountered in an input file, it is the last stream in that file. + + Like gunzip and uncompress, the file attributes of the original compressed + file are maintained in the final uncompressed file, to the extent that the + user permissions allow it. + + On my Mac OS X PowerPC G4, gun is almost twice as fast as gunzip (version + 1.2.4) is on the same file, when gun is linked with zlib 1.2.2. Also the + LZW decompression provided by gun is about twice as fast as the standard + Unix uncompress command. + */ + +/* external functions and related types and constants */ +#include /* fprintf() */ +#include /* malloc(), free() */ +#include /* strerror(), strcmp(), strlen(), memcpy() */ +#include /* errno */ +#include /* open() */ +#include /* read(), write(), close(), chown(), unlink() */ +#include +#include /* stat(), chmod() */ +#include /* utime() */ +#include "zlib.h" /* inflateBackInit(), inflateBack(), */ + /* inflateBackEnd(), crc32() */ + +/* function declaration */ +#define local static + +/* buffer constants */ +#define SIZE 32768U /* input and output buffer sizes */ +#define PIECE 16384 /* limits i/o chunks for 16-bit int case */ + +/* structure for infback() to pass to input function in() -- it maintains the + input file and a buffer of size SIZE */ +struct ind { + int infile; + unsigned char *inbuf; +}; + +/* Load input buffer, assumed to be empty, and return bytes loaded and a + pointer to them. read() is called until the buffer is full, or until it + returns end-of-file or error. Return 0 on error. */ +local unsigned in(void *in_desc, z_const unsigned char **buf) +{ + int ret; + unsigned len; + unsigned char *next; + struct ind *me = (struct ind *)in_desc; + + next = me->inbuf; + *buf = next; + len = 0; + do { + ret = PIECE; + if ((unsigned)ret > SIZE - len) + ret = (int)(SIZE - len); + ret = (int)read(me->infile, next, ret); + if (ret == -1) { + len = 0; + break; + } + next += ret; + len += ret; + } while (ret != 0 && len < SIZE); + return len; +} + +/* structure for infback() to pass to output function out() -- it maintains the + output file, a running CRC-32 check on the output and the total number of + bytes output, both for checking against the gzip trailer. (The length in + the gzip trailer is stored modulo 2^32, so it's ok if a long is 32 bits and + the output is greater than 4 GB.) */ +struct outd { + int outfile; + int check; /* true if checking crc and total */ + unsigned long crc; + unsigned long total; +}; + +/* Write output buffer and update the CRC-32 and total bytes written. write() + is called until all of the output is written or an error is encountered. + On success out() returns 0. For a write failure, out() returns 1. If the + output file descriptor is -1, then nothing is written. + */ +local int out(void *out_desc, unsigned char *buf, unsigned len) +{ + int ret; + struct outd *me = (struct outd *)out_desc; + + if (me->check) { + me->crc = crc32(me->crc, buf, len); + me->total += len; + } + if (me->outfile != -1) + do { + ret = PIECE; + if ((unsigned)ret > len) + ret = (int)len; + ret = (int)write(me->outfile, buf, ret); + if (ret == -1) + return 1; + buf += ret; + len -= ret; + } while (len != 0); + return 0; +} + +/* next input byte macro for use inside lunpipe() and gunpipe() */ +#define NEXT() (have ? 0 : (have = in(indp, &next)), \ + last = have ? (have--, (int)(*next++)) : -1) + +/* memory for gunpipe() and lunpipe() -- + the first 256 entries of prefix[] and suffix[] are never used, could + have offset the index, but it's faster to waste the memory */ +unsigned char inbuf[SIZE]; /* input buffer */ +unsigned char outbuf[SIZE]; /* output buffer */ +unsigned short prefix[65536]; /* index to LZW prefix string */ +unsigned char suffix[65536]; /* one-character LZW suffix */ +unsigned char match[65280 + 2]; /* buffer for reversed match or gzip + 32K sliding window */ + +/* throw out what's left in the current bits byte buffer (this is a vestigial + aspect of the compressed data format derived from an implementation that + made use of a special VAX machine instruction!) */ +#define FLUSHCODE() \ + do { \ + left = 0; \ + rem = 0; \ + if (chunk > have) { \ + chunk -= have; \ + have = 0; \ + if (NEXT() == -1) \ + break; \ + chunk--; \ + if (chunk > have) { \ + chunk = have = 0; \ + break; \ + } \ + } \ + have -= chunk; \ + next += chunk; \ + chunk = 0; \ + } while (0) + +/* Decompress a compress (LZW) file from indp to outfile. The compress magic + header (two bytes) has already been read and verified. There are have bytes + of buffered input at next. strm is used for passing error information back + to gunpipe(). + + lunpipe() will return Z_OK on success, Z_BUF_ERROR for an unexpected end of + file, read error, or write error (a write error indicated by strm->next_in + not equal to Z_NULL), or Z_DATA_ERROR for invalid input. + */ +local int lunpipe(unsigned have, z_const unsigned char *next, struct ind *indp, + int outfile, z_stream *strm) +{ + int last; /* last byte read by NEXT(), or -1 if EOF */ + unsigned chunk; /* bytes left in current chunk */ + int left; /* bits left in rem */ + unsigned rem; /* unused bits from input */ + int bits; /* current bits per code */ + unsigned code; /* code, table traversal index */ + unsigned mask; /* mask for current bits codes */ + int max; /* maximum bits per code for this stream */ + unsigned flags; /* compress flags, then block compress flag */ + unsigned end; /* last valid entry in prefix/suffix tables */ + unsigned temp; /* current code */ + unsigned prev; /* previous code */ + unsigned final; /* last character written for previous code */ + unsigned stack; /* next position for reversed string */ + unsigned outcnt; /* bytes in output buffer */ + struct outd outd; /* output structure */ + unsigned char *p; + + /* set up output */ + outd.outfile = outfile; + outd.check = 0; + + /* process remainder of compress header -- a flags byte */ + flags = NEXT(); + if (last == -1) + return Z_BUF_ERROR; + if (flags & 0x60) { + strm->msg = (char *)"unknown lzw flags set"; + return Z_DATA_ERROR; + } + max = flags & 0x1f; + if (max < 9 || max > 16) { + strm->msg = (char *)"lzw bits out of range"; + return Z_DATA_ERROR; + } + if (max == 9) /* 9 doesn't really mean 9 */ + max = 10; + flags &= 0x80; /* true if block compress */ + + /* clear table */ + bits = 9; + mask = 0x1ff; + end = flags ? 256 : 255; + + /* set up: get first 9-bit code, which is the first decompressed byte, but + don't create a table entry until the next code */ + if (NEXT() == -1) /* no compressed data is ok */ + return Z_OK; + final = prev = (unsigned)last; /* low 8 bits of code */ + if (NEXT() == -1) /* missing a bit */ + return Z_BUF_ERROR; + if (last & 1) { /* code must be < 256 */ + strm->msg = (char *)"invalid lzw code"; + return Z_DATA_ERROR; + } + rem = (unsigned)last >> 1; /* remaining 7 bits */ + left = 7; + chunk = bits - 2; /* 7 bytes left in this chunk */ + outbuf[0] = (unsigned char)final; /* write first decompressed byte */ + outcnt = 1; + + /* decode codes */ + stack = 0; + for (;;) { + /* if the table will be full after this, increment the code size */ + if (end >= mask && bits < max) { + FLUSHCODE(); + bits++; + mask <<= 1; + mask++; + } + + /* get a code of length bits */ + if (chunk == 0) /* decrement chunk modulo bits */ + chunk = bits; + code = rem; /* low bits of code */ + if (NEXT() == -1) { /* EOF is end of compressed data */ + /* write remaining buffered output */ + if (outcnt && out(&outd, outbuf, outcnt)) { + strm->next_in = outbuf; /* signal write error */ + return Z_BUF_ERROR; + } + return Z_OK; + } + code += (unsigned)last << left; /* middle (or high) bits of code */ + left += 8; + chunk--; + if (bits > left) { /* need more bits */ + if (NEXT() == -1) /* can't end in middle of code */ + return Z_BUF_ERROR; + code += (unsigned)last << left; /* high bits of code */ + left += 8; + chunk--; + } + code &= mask; /* mask to current code length */ + left -= bits; /* number of unused bits */ + rem = (unsigned)last >> (8 - left); /* unused bits from last byte */ + + /* process clear code (256) */ + if (code == 256 && flags) { + FLUSHCODE(); + bits = 9; /* initialize bits and mask */ + mask = 0x1ff; + end = 255; /* empty table */ + continue; /* get next code */ + } + + /* special code to reuse last match */ + temp = code; /* save the current code */ + if (code > end) { + /* Be picky on the allowed code here, and make sure that the code + we drop through (prev) will be a valid index so that random + input does not cause an exception. The code != end + 1 check is + empirically derived, and not checked in the original uncompress + code. If this ever causes a problem, that check could be safely + removed. Leaving this check in greatly improves gun's ability + to detect random or corrupted input after a compress header. + In any case, the prev > end check must be retained. */ + if (code != end + 1 || prev > end) { + strm->msg = (char *)"invalid lzw code"; + return Z_DATA_ERROR; + } + match[stack++] = (unsigned char)final; + code = prev; + } + + /* walk through linked list to generate output in reverse order */ + p = match + stack; + while (code >= 256) { + *p++ = suffix[code]; + code = prefix[code]; + } + stack = p - match; + match[stack++] = (unsigned char)code; + final = code; + + /* link new table entry */ + if (end < mask) { + end++; + prefix[end] = (unsigned short)prev; + suffix[end] = (unsigned char)final; + } + + /* set previous code for next iteration */ + prev = temp; + + /* write output in forward order */ + while (stack > SIZE - outcnt) { + while (outcnt < SIZE) + outbuf[outcnt++] = match[--stack]; + if (out(&outd, outbuf, outcnt)) { + strm->next_in = outbuf; /* signal write error */ + return Z_BUF_ERROR; + } + outcnt = 0; + } + p = match + stack; + do { + outbuf[outcnt++] = *--p; + } while (p > match); + stack = 0; + + /* loop for next code with final and prev as the last match, rem and + left provide the first 0..7 bits of the next code, end is the last + valid table entry */ + } +} + +/* Decompress a gzip file from infile to outfile. strm is assumed to have been + successfully initialized with inflateBackInit(). The input file may consist + of a series of gzip streams, in which case all of them will be decompressed + to the output file. If outfile is -1, then the gzip stream(s) integrity is + checked and nothing is written. + + The return value is a zlib error code: Z_MEM_ERROR if out of memory, + Z_DATA_ERROR if the header or the compressed data is invalid, or if the + trailer CRC-32 check or length doesn't match, Z_BUF_ERROR if the input ends + prematurely or a write error occurs, or Z_ERRNO if junk (not a another gzip + stream) follows a valid gzip stream. + */ +local int gunpipe(z_stream *strm, int infile, int outfile) +{ + int ret, first, last; + unsigned have, flags, len; + z_const unsigned char *next = NULL; + struct ind ind, *indp; + struct outd outd; + + /* setup input buffer */ + ind.infile = infile; + ind.inbuf = inbuf; + indp = &ind; + + /* decompress concatenated gzip streams */ + have = 0; /* no input data read in yet */ + first = 1; /* looking for first gzip header */ + strm->next_in = Z_NULL; /* so Z_BUF_ERROR means EOF */ + for (;;) { + /* look for the two magic header bytes for a gzip stream */ + if (NEXT() == -1) { + ret = Z_OK; + break; /* empty gzip stream is ok */ + } + if (last != 31 || (NEXT() != 139 && last != 157)) { + strm->msg = (char *)"incorrect header check"; + ret = first ? Z_DATA_ERROR : Z_ERRNO; + break; /* not a gzip or compress header */ + } + first = 0; /* next non-header is junk */ + + /* process a compress (LZW) file -- can't be concatenated after this */ + if (last == 157) { + ret = lunpipe(have, next, indp, outfile, strm); + break; + } + + /* process remainder of gzip header */ + ret = Z_BUF_ERROR; + if (NEXT() != 8) { /* only deflate method allowed */ + if (last == -1) break; + strm->msg = (char *)"unknown compression method"; + ret = Z_DATA_ERROR; + break; + } + flags = NEXT(); /* header flags */ + NEXT(); /* discard mod time, xflgs, os */ + NEXT(); + NEXT(); + NEXT(); + NEXT(); + NEXT(); + if (last == -1) break; + if (flags & 0xe0) { + strm->msg = (char *)"unknown header flags set"; + ret = Z_DATA_ERROR; + break; + } + if (flags & 4) { /* extra field */ + len = NEXT(); + len += (unsigned)(NEXT()) << 8; + if (last == -1) break; + while (len > have) { + len -= have; + have = 0; + if (NEXT() == -1) break; + len--; + } + if (last == -1) break; + have -= len; + next += len; + } + if (flags & 8) /* file name */ + while (NEXT() != 0 && last != -1) + ; + if (flags & 16) /* comment */ + while (NEXT() != 0 && last != -1) + ; + if (flags & 2) { /* header crc */ + NEXT(); + NEXT(); + } + if (last == -1) break; + + /* set up output */ + outd.outfile = outfile; + outd.check = 1; + outd.crc = crc32(0L, Z_NULL, 0); + outd.total = 0; + + /* decompress data to output */ + strm->next_in = next; + strm->avail_in = have; + ret = inflateBack(strm, in, indp, out, &outd); + if (ret != Z_STREAM_END) break; + next = strm->next_in; + have = strm->avail_in; + strm->next_in = Z_NULL; /* so Z_BUF_ERROR means EOF */ + + /* check trailer */ + ret = Z_BUF_ERROR; + if (NEXT() != (int)(outd.crc & 0xff) || + NEXT() != (int)((outd.crc >> 8) & 0xff) || + NEXT() != (int)((outd.crc >> 16) & 0xff) || + NEXT() != (int)((outd.crc >> 24) & 0xff)) { + /* crc error */ + if (last != -1) { + strm->msg = (char *)"incorrect data check"; + ret = Z_DATA_ERROR; + } + break; + } + if (NEXT() != (int)(outd.total & 0xff) || + NEXT() != (int)((outd.total >> 8) & 0xff) || + NEXT() != (int)((outd.total >> 16) & 0xff) || + NEXT() != (int)((outd.total >> 24) & 0xff)) { + /* length error */ + if (last != -1) { + strm->msg = (char *)"incorrect length check"; + ret = Z_DATA_ERROR; + } + break; + } + + /* go back and look for another gzip stream */ + } + + /* clean up and return */ + return ret; +} + +/* Copy file attributes, from -> to, as best we can. This is best effort, so + no errors are reported. The mode bits, including suid, sgid, and the sticky + bit are copied (if allowed), the owner's user id and group id are copied + (again if allowed), and the access and modify times are copied. */ +local void copymeta(char *from, char *to) +{ + struct stat was; + struct utimbuf when; + + /* get all of from's Unix meta data, return if not a regular file */ + if (stat(from, &was) != 0 || (was.st_mode & S_IFMT) != S_IFREG) + return; + + /* set to's mode bits, ignore errors */ + (void)chmod(to, was.st_mode & 07777); + + /* copy owner's user and group, ignore errors */ + (void)chown(to, was.st_uid, was.st_gid); + + /* copy access and modify times, ignore errors */ + when.actime = was.st_atime; + when.modtime = was.st_mtime; + (void)utime(to, &when); +} + +/* Decompress the file inname to the file outnname, of if test is true, just + decompress without writing and check the gzip trailer for integrity. If + inname is NULL or an empty string, read from stdin. If outname is NULL or + an empty string, write to stdout. strm is a pre-initialized inflateBack + structure. When appropriate, copy the file attributes from inname to + outname. + + gunzip() returns 1 if there is an out-of-memory error or an unexpected + return code from gunpipe(). Otherwise it returns 0. + */ +local int gunzip(z_stream *strm, char *inname, char *outname, int test) +{ + int ret; + int infile, outfile; + + /* open files */ + if (inname == NULL || *inname == 0) { + inname = "-"; + infile = 0; /* stdin */ + } + else { + infile = open(inname, O_RDONLY, 0); + if (infile == -1) { + fprintf(stderr, "gun cannot open %s\n", inname); + return 0; + } + } + if (test) + outfile = -1; + else if (outname == NULL || *outname == 0) { + outname = "-"; + outfile = 1; /* stdout */ + } + else { + outfile = open(outname, O_CREAT | O_TRUNC | O_WRONLY, 0666); + if (outfile == -1) { + close(infile); + fprintf(stderr, "gun cannot create %s\n", outname); + return 0; + } + } + errno = 0; + + /* decompress */ + ret = gunpipe(strm, infile, outfile); + if (outfile > 2) close(outfile); + if (infile > 2) close(infile); + + /* interpret result */ + switch (ret) { + case Z_OK: + case Z_ERRNO: + if (infile > 2 && outfile > 2) { + copymeta(inname, outname); /* copy attributes */ + unlink(inname); + } + if (ret == Z_ERRNO) + fprintf(stderr, "gun warning: trailing garbage ignored in %s\n", + inname); + break; + case Z_DATA_ERROR: + if (outfile > 2) unlink(outname); + fprintf(stderr, "gun data error on %s: %s\n", inname, strm->msg); + break; + case Z_MEM_ERROR: + if (outfile > 2) unlink(outname); + fprintf(stderr, "gun out of memory error--aborting\n"); + return 1; + case Z_BUF_ERROR: + if (outfile > 2) unlink(outname); + if (strm->next_in != Z_NULL) { + fprintf(stderr, "gun write error on %s: %s\n", + outname, strerror(errno)); + } + else if (errno) { + fprintf(stderr, "gun read error on %s: %s\n", + inname, strerror(errno)); + } + else { + fprintf(stderr, "gun unexpected end of file on %s\n", + inname); + } + break; + default: + if (outfile > 2) unlink(outname); + fprintf(stderr, "gun internal error--aborting\n"); + return 1; + } + return 0; +} + +/* Process the gun command line arguments. See the command syntax near the + beginning of this source file. */ +int main(int argc, char **argv) +{ + int ret, len, test; + char *outname; + unsigned char *window; + z_stream strm; + + /* initialize inflateBack state for repeated use */ + window = match; /* reuse LZW match buffer */ + strm.zalloc = Z_NULL; + strm.zfree = Z_NULL; + strm.opaque = Z_NULL; + ret = inflateBackInit(&strm, 15, window); + if (ret != Z_OK) { + fprintf(stderr, "gun out of memory error--aborting\n"); + return 1; + } + + /* decompress each file to the same name with the suffix removed */ + argc--; + argv++; + test = 0; + if (argc && strcmp(*argv, "-h") == 0) { + fprintf(stderr, "gun 1.6 (17 Jan 2010)\n"); + fprintf(stderr, "Copyright (C) 2003-2010 Mark Adler\n"); + fprintf(stderr, "usage: gun [-t] [file1.gz [file2.Z ...]]\n"); + return 0; + } + if (argc && strcmp(*argv, "-t") == 0) { + test = 1; + argc--; + argv++; + } + if (argc) + do { + if (test) + outname = NULL; + else { + len = (int)strlen(*argv); + if (strcmp(*argv + len - 3, ".gz") == 0 || + strcmp(*argv + len - 3, "-gz") == 0) + len -= 3; + else if (strcmp(*argv + len - 2, ".z") == 0 || + strcmp(*argv + len - 2, "-z") == 0 || + strcmp(*argv + len - 2, "_z") == 0 || + strcmp(*argv + len - 2, ".Z") == 0) + len -= 2; + else { + fprintf(stderr, "gun error: no gz type on %s--skipping\n", + *argv); + continue; + } + outname = malloc(len + 1); + if (outname == NULL) { + fprintf(stderr, "gun out of memory error--aborting\n"); + ret = 1; + break; + } + memcpy(outname, *argv, len); + outname[len] = 0; + } + ret = gunzip(&strm, *argv, outname, test); + if (outname != NULL) free(outname); + if (ret) break; + } while (argv++, --argc); + else + ret = gunzip(&strm, NULL, NULL, test); + + /* clean up */ + inflateBackEnd(&strm); + return ret; +} diff --git a/c-blosc/internal-complibs/zlib-1.3.1/examples/gzappend.c b/c-blosc/internal-complibs/zlib-1.3.1/examples/gzappend.c new file mode 100644 index 0000000..23e93cf --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.3.1/examples/gzappend.c @@ -0,0 +1,504 @@ +/* gzappend -- command to append to a gzip file + + Copyright (C) 2003, 2012 Mark Adler, all rights reserved + version 1.2, 11 Oct 2012 + + This software is provided 'as-is', without any express or implied + warranty. In no event will the author be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + Mark Adler madler@alumni.caltech.edu + */ + +/* + * Change history: + * + * 1.0 19 Oct 2003 - First version + * 1.1 4 Nov 2003 - Expand and clarify some comments and notes + * - Add version and copyright to help + * - Send help to stdout instead of stderr + * - Add some preemptive typecasts + * - Add L to constants in lseek() calls + * - Remove some debugging information in error messages + * - Use new data_type definition for zlib 1.2.1 + * - Simplify and unify file operations + * - Finish off gzip file in gztack() + * - Use deflatePrime() instead of adding empty blocks + * - Keep gzip file clean on appended file read errors + * - Use in-place rotate instead of auxiliary buffer + * (Why you ask? Because it was fun to write!) + * 1.2 11 Oct 2012 - Fix for proper z_const usage + * - Check for input buffer malloc failure + */ + +/* + gzappend takes a gzip file and appends to it, compressing files from the + command line or data from stdin. The gzip file is written to directly, to + avoid copying that file, in case it's large. Note that this results in the + unfriendly behavior that if gzappend fails, the gzip file is corrupted. + + This program was written to illustrate the use of the new Z_BLOCK option of + zlib 1.2.x's inflate() function. This option returns from inflate() at each + block boundary to facilitate locating and modifying the last block bit at + the start of the final deflate block. Also whether using Z_BLOCK or not, + another required feature of zlib 1.2.x is that inflate() now provides the + number of unused bits in the last input byte used. gzappend will not work + with versions of zlib earlier than 1.2.1. + + gzappend first decompresses the gzip file internally, discarding all but + the last 32K of uncompressed data, and noting the location of the last block + bit and the number of unused bits in the last byte of the compressed data. + The gzip trailer containing the CRC-32 and length of the uncompressed data + is verified. This trailer will be later overwritten. + + Then the last block bit is cleared by seeking back in the file and rewriting + the byte that contains it. Seeking forward, the last byte of the compressed + data is saved along with the number of unused bits to initialize deflate. + + A deflate process is initialized, using the last 32K of the uncompressed + data from the gzip file to initialize the dictionary. If the total + uncompressed data was less than 32K, then all of it is used to initialize + the dictionary. The deflate output bit buffer is also initialized with the + last bits from the original deflate stream. From here on, the data to + append is simply compressed using deflate, and written to the gzip file. + When that is complete, the new CRC-32 and uncompressed length are written + as the trailer of the gzip file. + */ + +#include +#include +#include +#include +#include +#include "zlib.h" + +#define local static +#define LGCHUNK 14 +#define CHUNK (1U << LGCHUNK) +#define DSIZE 32768U + +/* print an error message and terminate with extreme prejudice */ +local void bye(char *msg1, char *msg2) +{ + fprintf(stderr, "gzappend error: %s%s\n", msg1, msg2); + exit(1); +} + +/* return the greatest common divisor of a and b using Euclid's algorithm, + modified to be fast when one argument much greater than the other, and + coded to avoid unnecessary swapping */ +local unsigned gcd(unsigned a, unsigned b) +{ + unsigned c; + + while (a && b) + if (a > b) { + c = b; + while (a - c >= c) + c <<= 1; + a -= c; + } + else { + c = a; + while (b - c >= c) + c <<= 1; + b -= c; + } + return a + b; +} + +/* rotate list[0..len-1] left by rot positions, in place */ +local void rotate(unsigned char *list, unsigned len, unsigned rot) +{ + unsigned char tmp; + unsigned cycles; + unsigned char *start, *last, *to, *from; + + /* normalize rot and handle degenerate cases */ + if (len < 2) return; + if (rot >= len) rot %= len; + if (rot == 0) return; + + /* pointer to last entry in list */ + last = list + (len - 1); + + /* do simple left shift by one */ + if (rot == 1) { + tmp = *list; + memmove(list, list + 1, len - 1); + *last = tmp; + return; + } + + /* do simple right shift by one */ + if (rot == len - 1) { + tmp = *last; + memmove(list + 1, list, len - 1); + *list = tmp; + return; + } + + /* otherwise do rotate as a set of cycles in place */ + cycles = gcd(len, rot); /* number of cycles */ + do { + start = from = list + cycles; /* start index is arbitrary */ + tmp = *from; /* save entry to be overwritten */ + for (;;) { + to = from; /* next step in cycle */ + from += rot; /* go right rot positions */ + if (from > last) from -= len; /* (pointer better not wrap) */ + if (from == start) break; /* all but one shifted */ + *to = *from; /* shift left */ + } + *to = tmp; /* complete the circle */ + } while (--cycles); +} + +/* structure for gzip file read operations */ +typedef struct { + int fd; /* file descriptor */ + int size; /* 1 << size is bytes in buf */ + unsigned left; /* bytes available at next */ + unsigned char *buf; /* buffer */ + z_const unsigned char *next; /* next byte in buffer */ + char *name; /* file name for error messages */ +} file; + +/* reload buffer */ +local int readin(file *in) +{ + int len; + + len = read(in->fd, in->buf, 1 << in->size); + if (len == -1) bye("error reading ", in->name); + in->left = (unsigned)len; + in->next = in->buf; + return len; +} + +/* read from file in, exit if end-of-file */ +local int readmore(file *in) +{ + if (readin(in) == 0) bye("unexpected end of ", in->name); + return 0; +} + +#define read1(in) (in->left == 0 ? readmore(in) : 0, \ + in->left--, *(in->next)++) + +/* skip over n bytes of in */ +local void skip(file *in, unsigned n) +{ + unsigned bypass; + + if (n > in->left) { + n -= in->left; + bypass = n & ~((1U << in->size) - 1); + if (bypass) { + if (lseek(in->fd, (off_t)bypass, SEEK_CUR) == -1) + bye("seeking ", in->name); + n -= bypass; + } + readmore(in); + if (n > in->left) + bye("unexpected end of ", in->name); + } + in->left -= n; + in->next += n; +} + +/* read a four-byte unsigned integer, little-endian, from in */ +unsigned long read4(file *in) +{ + unsigned long val; + + val = read1(in); + val += (unsigned)read1(in) << 8; + val += (unsigned long)read1(in) << 16; + val += (unsigned long)read1(in) << 24; + return val; +} + +/* skip over gzip header */ +local void gzheader(file *in) +{ + int flags; + unsigned n; + + if (read1(in) != 31 || read1(in) != 139) bye(in->name, " not a gzip file"); + if (read1(in) != 8) bye("unknown compression method in", in->name); + flags = read1(in); + if (flags & 0xe0) bye("unknown header flags set in", in->name); + skip(in, 6); + if (flags & 4) { + n = read1(in); + n += (unsigned)(read1(in)) << 8; + skip(in, n); + } + if (flags & 8) while (read1(in) != 0) ; + if (flags & 16) while (read1(in) != 0) ; + if (flags & 2) skip(in, 2); +} + +/* decompress gzip file "name", return strm with a deflate stream ready to + continue compression of the data in the gzip file, and return a file + descriptor pointing to where to write the compressed data -- the deflate + stream is initialized to compress using level "level" */ +local int gzscan(char *name, z_stream *strm, int level) +{ + int ret, lastbit, left, full; + unsigned have; + unsigned long crc, tot; + unsigned char *window; + off_t lastoff, end; + file gz; + + /* open gzip file */ + gz.name = name; + gz.fd = open(name, O_RDWR, 0); + if (gz.fd == -1) bye("cannot open ", name); + gz.buf = malloc(CHUNK); + if (gz.buf == NULL) bye("out of memory", ""); + gz.size = LGCHUNK; + gz.left = 0; + + /* skip gzip header */ + gzheader(&gz); + + /* prepare to decompress */ + window = malloc(DSIZE); + if (window == NULL) bye("out of memory", ""); + strm->zalloc = Z_NULL; + strm->zfree = Z_NULL; + strm->opaque = Z_NULL; + ret = inflateInit2(strm, -15); + if (ret != Z_OK) bye("out of memory", " or library mismatch"); + + /* decompress the deflate stream, saving append information */ + lastbit = 0; + lastoff = lseek(gz.fd, 0L, SEEK_CUR) - gz.left; + left = 0; + strm->avail_in = gz.left; + strm->next_in = gz.next; + crc = crc32(0L, Z_NULL, 0); + have = full = 0; + do { + /* if needed, get more input */ + if (strm->avail_in == 0) { + readmore(&gz); + strm->avail_in = gz.left; + strm->next_in = gz.next; + } + + /* set up output to next available section of sliding window */ + strm->avail_out = DSIZE - have; + strm->next_out = window + have; + + /* inflate and check for errors */ + ret = inflate(strm, Z_BLOCK); + if (ret == Z_STREAM_ERROR) bye("internal stream error!", ""); + if (ret == Z_MEM_ERROR) bye("out of memory", ""); + if (ret == Z_DATA_ERROR) + bye("invalid compressed data--format violated in", name); + + /* update crc and sliding window pointer */ + crc = crc32(crc, window + have, DSIZE - have - strm->avail_out); + if (strm->avail_out) + have = DSIZE - strm->avail_out; + else { + have = 0; + full = 1; + } + + /* process end of block */ + if (strm->data_type & 128) { + if (strm->data_type & 64) + left = strm->data_type & 0x1f; + else { + lastbit = strm->data_type & 0x1f; + lastoff = lseek(gz.fd, 0L, SEEK_CUR) - strm->avail_in; + } + } + } while (ret != Z_STREAM_END); + inflateEnd(strm); + gz.left = strm->avail_in; + gz.next = strm->next_in; + + /* save the location of the end of the compressed data */ + end = lseek(gz.fd, 0L, SEEK_CUR) - gz.left; + + /* check gzip trailer and save total for deflate */ + if (crc != read4(&gz)) + bye("invalid compressed data--crc mismatch in ", name); + tot = strm->total_out; + if ((tot & 0xffffffffUL) != read4(&gz)) + bye("invalid compressed data--length mismatch in", name); + + /* if not at end of file, warn */ + if (gz.left || readin(&gz)) + fprintf(stderr, + "gzappend warning: junk at end of gzip file overwritten\n"); + + /* clear last block bit */ + lseek(gz.fd, lastoff - (lastbit != 0), SEEK_SET); + if (read(gz.fd, gz.buf, 1) != 1) bye("reading after seek on ", name); + *gz.buf = (unsigned char)(*gz.buf ^ (1 << ((8 - lastbit) & 7))); + lseek(gz.fd, -1L, SEEK_CUR); + if (write(gz.fd, gz.buf, 1) != 1) bye("writing after seek to ", name); + + /* if window wrapped, build dictionary from window by rotating */ + if (full) { + rotate(window, DSIZE, have); + have = DSIZE; + } + + /* set up deflate stream with window, crc, total_in, and leftover bits */ + ret = deflateInit2(strm, level, Z_DEFLATED, -15, 8, Z_DEFAULT_STRATEGY); + if (ret != Z_OK) bye("out of memory", ""); + deflateSetDictionary(strm, window, have); + strm->adler = crc; + strm->total_in = tot; + if (left) { + lseek(gz.fd, --end, SEEK_SET); + if (read(gz.fd, gz.buf, 1) != 1) bye("reading after seek on ", name); + deflatePrime(strm, 8 - left, *gz.buf); + } + lseek(gz.fd, end, SEEK_SET); + + /* clean up and return */ + free(window); + free(gz.buf); + return gz.fd; +} + +/* append file "name" to gzip file gd using deflate stream strm -- if last + is true, then finish off the deflate stream at the end */ +local void gztack(char *name, int gd, z_stream *strm, int last) +{ + int fd, len, ret; + unsigned left; + unsigned char *in, *out; + + /* open file to compress and append */ + fd = 0; + if (name != NULL) { + fd = open(name, O_RDONLY, 0); + if (fd == -1) + fprintf(stderr, "gzappend warning: %s not found, skipping ...\n", + name); + } + + /* allocate buffers */ + in = malloc(CHUNK); + out = malloc(CHUNK); + if (in == NULL || out == NULL) bye("out of memory", ""); + + /* compress input file and append to gzip file */ + do { + /* get more input */ + len = read(fd, in, CHUNK); + if (len == -1) { + fprintf(stderr, + "gzappend warning: error reading %s, skipping rest ...\n", + name); + len = 0; + } + strm->avail_in = (unsigned)len; + strm->next_in = in; + if (len) strm->adler = crc32(strm->adler, in, (unsigned)len); + + /* compress and write all available output */ + do { + strm->avail_out = CHUNK; + strm->next_out = out; + ret = deflate(strm, last && len == 0 ? Z_FINISH : Z_NO_FLUSH); + left = CHUNK - strm->avail_out; + while (left) { + len = write(gd, out + CHUNK - strm->avail_out - left, left); + if (len == -1) bye("writing gzip file", ""); + left -= (unsigned)len; + } + } while (strm->avail_out == 0 && ret != Z_STREAM_END); + } while (len != 0); + + /* write trailer after last entry */ + if (last) { + deflateEnd(strm); + out[0] = (unsigned char)(strm->adler); + out[1] = (unsigned char)(strm->adler >> 8); + out[2] = (unsigned char)(strm->adler >> 16); + out[3] = (unsigned char)(strm->adler >> 24); + out[4] = (unsigned char)(strm->total_in); + out[5] = (unsigned char)(strm->total_in >> 8); + out[6] = (unsigned char)(strm->total_in >> 16); + out[7] = (unsigned char)(strm->total_in >> 24); + len = 8; + do { + ret = write(gd, out + 8 - len, len); + if (ret == -1) bye("writing gzip file", ""); + len -= ret; + } while (len); + close(gd); + } + + /* clean up and return */ + free(out); + free(in); + if (fd > 0) close(fd); +} + +/* process the compression level option if present, scan the gzip file, and + append the specified files, or append the data from stdin if no other file + names are provided on the command line -- the gzip file must be writable + and seekable */ +int main(int argc, char **argv) +{ + int gd, level; + z_stream strm; + + /* ignore command name */ + argc--; argv++; + + /* provide usage if no arguments */ + if (*argv == NULL) { + printf( + "gzappend 1.2 (11 Oct 2012) Copyright (C) 2003, 2012 Mark Adler\n" + ); + printf( + "usage: gzappend [-level] file.gz [ addthis [ andthis ... ]]\n"); + return 0; + } + + /* set compression level */ + level = Z_DEFAULT_COMPRESSION; + if (argv[0][0] == '-') { + if (argv[0][1] < '0' || argv[0][1] > '9' || argv[0][2] != 0) + bye("invalid compression level", ""); + level = argv[0][1] - '0'; + if (*++argv == NULL) bye("no gzip file name after options", ""); + } + + /* prepare to append to gzip file */ + gd = gzscan(*argv++, &strm, level); + + /* append files on command line, or from stdin if none */ + if (*argv == NULL) + gztack(NULL, gd, &strm, 1); + else + do { + gztack(*argv, gd, &strm, argv[1] == NULL); + } while (*++argv != NULL); + return 0; +} diff --git a/c-blosc/internal-complibs/zlib-1.3.1/examples/gzjoin.c b/c-blosc/internal-complibs/zlib-1.3.1/examples/gzjoin.c new file mode 100644 index 0000000..89e8098 --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.3.1/examples/gzjoin.c @@ -0,0 +1,449 @@ +/* gzjoin -- command to join gzip files into one gzip file + + Copyright (C) 2004, 2005, 2012 Mark Adler, all rights reserved + version 1.2, 14 Aug 2012 + + This software is provided 'as-is', without any express or implied + warranty. In no event will the author be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + Mark Adler madler@alumni.caltech.edu + */ + +/* + * Change history: + * + * 1.0 11 Dec 2004 - First version + * 1.1 12 Jun 2005 - Changed ssize_t to long for portability + * 1.2 14 Aug 2012 - Clean up for z_const usage + */ + +/* + gzjoin takes one or more gzip files on the command line and writes out a + single gzip file that will uncompress to the concatenation of the + uncompressed data from the individual gzip files. gzjoin does this without + having to recompress any of the data and without having to calculate a new + crc32 for the concatenated uncompressed data. gzjoin does however have to + decompress all of the input data in order to find the bits in the compressed + data that need to be modified to concatenate the streams. + + gzjoin does not do an integrity check on the input gzip files other than + checking the gzip header and decompressing the compressed data. They are + otherwise assumed to be complete and correct. + + Each joint between gzip files removes at least 18 bytes of previous trailer + and subsequent header, and inserts an average of about three bytes to the + compressed data in order to connect the streams. The output gzip file + has a minimal ten-byte gzip header with no file name or modification time. + + This program was written to illustrate the use of the Z_BLOCK option of + inflate() and the crc32_combine() function. gzjoin will not compile with + versions of zlib earlier than 1.2.3. + */ + +#include /* fputs(), fprintf(), fwrite(), putc() */ +#include /* exit(), malloc(), free() */ +#include /* open() */ +#include /* close(), read(), lseek() */ +#include "zlib.h" + /* crc32(), crc32_combine(), inflateInit2(), inflate(), inflateEnd() */ + +#define local static + +/* exit with an error (return a value to allow use in an expression) */ +local int bail(char *why1, char *why2) +{ + fprintf(stderr, "gzjoin error: %s%s, output incomplete\n", why1, why2); + exit(1); + return 0; +} + +/* -- simple buffered file input with access to the buffer -- */ + +#define CHUNK 32768 /* must be a power of two and fit in unsigned */ + +/* bin buffered input file type */ +typedef struct { + char *name; /* name of file for error messages */ + int fd; /* file descriptor */ + unsigned left; /* bytes remaining at next */ + unsigned char *next; /* next byte to read */ + unsigned char *buf; /* allocated buffer of length CHUNK */ +} bin; + +/* close a buffered file and free allocated memory */ +local void bclose(bin *in) +{ + if (in != NULL) { + if (in->fd != -1) + close(in->fd); + if (in->buf != NULL) + free(in->buf); + free(in); + } +} + +/* open a buffered file for input, return a pointer to type bin, or NULL on + failure */ +local bin *bopen(char *name) +{ + bin *in; + + in = malloc(sizeof(bin)); + if (in == NULL) + return NULL; + in->buf = malloc(CHUNK); + in->fd = open(name, O_RDONLY, 0); + if (in->buf == NULL || in->fd == -1) { + bclose(in); + return NULL; + } + in->left = 0; + in->next = in->buf; + in->name = name; + return in; +} + +/* load buffer from file, return -1 on read error, 0 or 1 on success, with + 1 indicating that end-of-file was reached */ +local int bload(bin *in) +{ + long len; + + if (in == NULL) + return -1; + if (in->left != 0) + return 0; + in->next = in->buf; + do { + len = (long)read(in->fd, in->buf + in->left, CHUNK - in->left); + if (len < 0) + return -1; + in->left += (unsigned)len; + } while (len != 0 && in->left < CHUNK); + return len == 0 ? 1 : 0; +} + +/* get a byte from the file, bail if end of file */ +#define bget(in) (in->left ? 0 : bload(in), \ + in->left ? (in->left--, *(in->next)++) : \ + bail("unexpected end of file on ", in->name)) + +/* get a four-byte little-endian unsigned integer from file */ +local unsigned long bget4(bin *in) +{ + unsigned long val; + + val = bget(in); + val += (unsigned long)(bget(in)) << 8; + val += (unsigned long)(bget(in)) << 16; + val += (unsigned long)(bget(in)) << 24; + return val; +} + +/* skip bytes in file */ +local void bskip(bin *in, unsigned skip) +{ + /* check pointer */ + if (in == NULL) + return; + + /* easy case -- skip bytes in buffer */ + if (skip <= in->left) { + in->left -= skip; + in->next += skip; + return; + } + + /* skip what's in buffer, discard buffer contents */ + skip -= in->left; + in->left = 0; + + /* seek past multiples of CHUNK bytes */ + if (skip > CHUNK) { + unsigned left; + + left = skip & (CHUNK - 1); + if (left == 0) { + /* exact number of chunks: seek all the way minus one byte to check + for end-of-file with a read */ + lseek(in->fd, skip - 1, SEEK_CUR); + if (read(in->fd, in->buf, 1) != 1) + bail("unexpected end of file on ", in->name); + return; + } + + /* skip the integral chunks, update skip with remainder */ + lseek(in->fd, skip - left, SEEK_CUR); + skip = left; + } + + /* read more input and skip remainder */ + bload(in); + if (skip > in->left) + bail("unexpected end of file on ", in->name); + in->left -= skip; + in->next += skip; +} + +/* -- end of buffered input functions -- */ + +/* skip the gzip header from file in */ +local void gzhead(bin *in) +{ + int flags; + + /* verify gzip magic header and compression method */ + if (bget(in) != 0x1f || bget(in) != 0x8b || bget(in) != 8) + bail(in->name, " is not a valid gzip file"); + + /* get and verify flags */ + flags = bget(in); + if ((flags & 0xe0) != 0) + bail("unknown reserved bits set in ", in->name); + + /* skip modification time, extra flags, and os */ + bskip(in, 6); + + /* skip extra field if present */ + if (flags & 4) { + unsigned len; + + len = bget(in); + len += (unsigned)(bget(in)) << 8; + bskip(in, len); + } + + /* skip file name if present */ + if (flags & 8) + while (bget(in) != 0) + ; + + /* skip comment if present */ + if (flags & 16) + while (bget(in) != 0) + ; + + /* skip header crc if present */ + if (flags & 2) + bskip(in, 2); +} + +/* write a four-byte little-endian unsigned integer to out */ +local void put4(unsigned long val, FILE *out) +{ + putc(val & 0xff, out); + putc((val >> 8) & 0xff, out); + putc((val >> 16) & 0xff, out); + putc((val >> 24) & 0xff, out); +} + +/* Load up zlib stream from buffered input, bail if end of file */ +local void zpull(z_streamp strm, bin *in) +{ + if (in->left == 0) + bload(in); + if (in->left == 0) + bail("unexpected end of file on ", in->name); + strm->avail_in = in->left; + strm->next_in = in->next; +} + +/* Write header for gzip file to out and initialize trailer. */ +local void gzinit(unsigned long *crc, unsigned long *tot, FILE *out) +{ + fwrite("\x1f\x8b\x08\0\0\0\0\0\0\xff", 1, 10, out); + *crc = crc32(0L, Z_NULL, 0); + *tot = 0; +} + +/* Copy the compressed data from name, zeroing the last block bit of the last + block if clr is true, and adding empty blocks as needed to get to a byte + boundary. If clr is false, then the last block becomes the last block of + the output, and the gzip trailer is written. crc and tot maintains the + crc and length (modulo 2^32) of the output for the trailer. The resulting + gzip file is written to out. gzinit() must be called before the first call + of gzcopy() to write the gzip header and to initialize crc and tot. */ +local void gzcopy(char *name, int clr, unsigned long *crc, unsigned long *tot, + FILE *out) +{ + int ret; /* return value from zlib functions */ + int pos; /* where the "last block" bit is in byte */ + int last; /* true if processing the last block */ + bin *in; /* buffered input file */ + unsigned char *start; /* start of compressed data in buffer */ + unsigned char *junk; /* buffer for uncompressed data -- discarded */ + z_off_t len; /* length of uncompressed data (support > 4 GB) */ + z_stream strm; /* zlib inflate stream */ + + /* open gzip file and skip header */ + in = bopen(name); + if (in == NULL) + bail("could not open ", name); + gzhead(in); + + /* allocate buffer for uncompressed data and initialize raw inflate + stream */ + junk = malloc(CHUNK); + strm.zalloc = Z_NULL; + strm.zfree = Z_NULL; + strm.opaque = Z_NULL; + strm.avail_in = 0; + strm.next_in = Z_NULL; + ret = inflateInit2(&strm, -15); + if (junk == NULL || ret != Z_OK) + bail("out of memory", ""); + + /* inflate and copy compressed data, clear last-block bit if requested */ + len = 0; + zpull(&strm, in); + start = in->next; + last = start[0] & 1; + if (last && clr) + start[0] &= ~1; + strm.avail_out = 0; + for (;;) { + /* if input used and output done, write used input and get more */ + if (strm.avail_in == 0 && strm.avail_out != 0) { + fwrite(start, 1, strm.next_in - start, out); + start = in->buf; + in->left = 0; + zpull(&strm, in); + } + + /* decompress -- return early when end-of-block reached */ + strm.avail_out = CHUNK; + strm.next_out = junk; + ret = inflate(&strm, Z_BLOCK); + switch (ret) { + case Z_MEM_ERROR: + bail("out of memory", ""); + case Z_DATA_ERROR: + bail("invalid compressed data in ", in->name); + } + + /* update length of uncompressed data */ + len += CHUNK - strm.avail_out; + + /* check for block boundary (only get this when block copied out) */ + if (strm.data_type & 128) { + /* if that was the last block, then done */ + if (last) + break; + + /* number of unused bits in last byte */ + pos = strm.data_type & 7; + + /* find the next last-block bit */ + if (pos != 0) { + /* next last-block bit is in last used byte */ + pos = 0x100 >> pos; + last = strm.next_in[-1] & pos; + if (last && clr) + in->buf[strm.next_in - in->buf - 1] &= ~pos; + } + else { + /* next last-block bit is in next unused byte */ + if (strm.avail_in == 0) { + /* don't have that byte yet -- get it */ + fwrite(start, 1, strm.next_in - start, out); + start = in->buf; + in->left = 0; + zpull(&strm, in); + } + last = strm.next_in[0] & 1; + if (last && clr) + in->buf[strm.next_in - in->buf] &= ~1; + } + } + } + + /* update buffer with unused input */ + in->left = strm.avail_in; + in->next = in->buf + (strm.next_in - in->buf); + + /* copy used input, write empty blocks to get to byte boundary */ + pos = strm.data_type & 7; + fwrite(start, 1, in->next - start - 1, out); + last = in->next[-1]; + if (pos == 0 || !clr) + /* already at byte boundary, or last file: write last byte */ + putc(last, out); + else { + /* append empty blocks to last byte */ + last &= ((0x100 >> pos) - 1); /* assure unused bits are zero */ + if (pos & 1) { + /* odd -- append an empty stored block */ + putc(last, out); + if (pos == 1) + putc(0, out); /* two more bits in block header */ + fwrite("\0\0\xff\xff", 1, 4, out); + } + else { + /* even -- append 1, 2, or 3 empty fixed blocks */ + switch (pos) { + case 6: + putc(last | 8, out); + last = 0; + case 4: + putc(last | 0x20, out); + last = 0; + case 2: + putc(last | 0x80, out); + putc(0, out); + } + } + } + + /* update crc and tot */ + *crc = crc32_combine(*crc, bget4(in), len); + *tot += (unsigned long)len; + + /* clean up */ + inflateEnd(&strm); + free(junk); + bclose(in); + + /* write trailer if this is the last gzip file */ + if (!clr) { + put4(*crc, out); + put4(*tot, out); + } +} + +/* join the gzip files on the command line, write result to stdout */ +int main(int argc, char **argv) +{ + unsigned long crc, tot; /* running crc and total uncompressed length */ + + /* skip command name */ + argc--; + argv++; + + /* show usage if no arguments */ + if (argc == 0) { + fputs("gzjoin usage: gzjoin f1.gz [f2.gz [f3.gz ...]] > fjoin.gz\n", + stderr); + return 0; + } + + /* join gzip files on command line and write to stdout */ + gzinit(&crc, &tot, stdout); + while (argc--) + gzcopy(*argv++, argc, &crc, &tot, stdout); + + /* done */ + return 0; +} diff --git a/c-blosc/internal-complibs/zlib-1.3.1/examples/gzlog.c b/c-blosc/internal-complibs/zlib-1.3.1/examples/gzlog.c new file mode 100644 index 0000000..da1b02e --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.3.1/examples/gzlog.c @@ -0,0 +1,1061 @@ +/* + * gzlog.c + * Copyright (C) 2004, 2008, 2012, 2016, 2019 Mark Adler, all rights reserved + * For conditions of distribution and use, see copyright notice in gzlog.h + * version 2.3, 25 May 2019 + */ + +/* + gzlog provides a mechanism for frequently appending short strings to a gzip + file that is efficient both in execution time and compression ratio. The + strategy is to write the short strings in an uncompressed form to the end of + the gzip file, only compressing when the amount of uncompressed data has + reached a given threshold. + + gzlog also provides protection against interruptions in the process due to + system crashes. The status of the operation is recorded in an extra field + in the gzip file, and is only updated once the gzip file is brought to a + valid state. The last data to be appended or compressed is saved in an + auxiliary file, so that if the operation is interrupted, it can be completed + the next time an append operation is attempted. + + gzlog maintains another auxiliary file with the last 32K of data from the + compressed portion, which is preloaded for the compression of the subsequent + data. This minimizes the impact to the compression ratio of appending. + */ + +/* + Operations Concept: + + Files (log name "foo"): + foo.gz -- gzip file with the complete log + foo.add -- last message to append or last data to compress + foo.dict -- dictionary of the last 32K of data for next compression + foo.temp -- temporary dictionary file for compression after this one + foo.lock -- lock file for reading and writing the other files + foo.repairs -- log file for log file recovery operations (not compressed) + + gzip file structure: + - fixed-length (no file name) header with extra field (see below) + - compressed data ending initially with empty stored block + - uncompressed data filling out originally empty stored block and + subsequent stored blocks as needed (16K max each) + - gzip trailer + - no junk at end (no other gzip streams) + + When appending data, the information in the first three items above plus the + foo.add file are sufficient to recover an interrupted append operation. The + extra field has the necessary information to restore the start of the last + stored block and determine where to append the data in the foo.add file, as + well as the crc and length of the gzip data before the append operation. + + The foo.add file is created before the gzip file is marked for append, and + deleted after the gzip file is marked as complete. So if the append + operation is interrupted, the data to add will still be there. If due to + some external force, the foo.add file gets deleted between when the append + operation was interrupted and when recovery is attempted, the gzip file will + still be restored, but without the appended data. + + When compressing data, the information in the first two items above plus the + foo.add file are sufficient to recover an interrupted compress operation. + The extra field has the necessary information to find the end of the + compressed data, and contains both the crc and length of just the compressed + data and of the complete set of data including the contents of the foo.add + file. + + Again, the foo.add file is maintained during the compress operation in case + of an interruption. If in the unlikely event the foo.add file with the data + to be compressed is missing due to some external force, a gzip file with + just the previous compressed data will be reconstructed. In this case, all + of the data that was to be compressed is lost (approximately one megabyte). + This will not occur if all that happened was an interruption of the compress + operation. + + The third state that is marked is the replacement of the old dictionary with + the new dictionary after a compress operation. Once compression is + complete, the gzip file is marked as being in the replace state. This + completes the gzip file, so an interrupt after being so marked does not + result in recompression. Then the dictionary file is replaced, and the gzip + file is marked as completed. This state prevents the possibility of + restarting compression with the wrong dictionary file. + + All three operations are wrapped by a lock/unlock procedure. In order to + gain exclusive access to the log files, first a foo.lock file must be + exclusively created. When all operations are complete, the lock is + released by deleting the foo.lock file. If when attempting to create the + lock file, it already exists and the modify time of the lock file is more + than five minutes old (set by the PATIENCE define below), then the old + lock file is considered stale and deleted, and the exclusive creation of + the lock file is retried. To assure that there are no false assessments + of the staleness of the lock file, the operations periodically touch the + lock file to update the modified date. + + Following is the definition of the extra field with all of the information + required to enable the above append and compress operations and their + recovery if interrupted. Multi-byte values are stored little endian + (consistent with the gzip format). File pointers are eight bytes long. + The crc's and lengths for the gzip trailer are four bytes long. (Note that + the length at the end of a gzip file is used for error checking only, and + for large files is actually the length modulo 2^32.) The stored block + length is two bytes long. The gzip extra field two-byte identification is + "ap" for append. It is assumed that writing the extra field to the file is + an "atomic" operation. That is, either all of the extra field is written + to the file, or none of it is, if the operation is interrupted right at the + point of updating the extra field. This is a reasonable assumption, since + the extra field is within the first 52 bytes of the file, which is smaller + than any expected block size for a mass storage device (usually 512 bytes or + larger). + + Extra field (35 bytes): + - Pointer to first stored block length -- this points to the two-byte length + of the first stored block, which is followed by the two-byte, one's + complement of that length. The stored block length is preceded by the + three-bit header of the stored block, which is the actual start of the + stored block in the deflate format. See the bit offset field below. + - Pointer to the last stored block length. This is the same as above, but + for the last stored block of the uncompressed data in the gzip file. + Initially this is the same as the first stored block length pointer. + When the stored block gets to 16K (see the MAX_STORE define), then a new + stored block as added, at which point the last stored block length pointer + is different from the first stored block length pointer. When they are + different, the first bit of the last stored block header is eight bits, or + one byte back from the block length. + - Compressed data crc and length. This is the crc and length of the data + that is in the compressed portion of the deflate stream. These are used + only in the event that the foo.add file containing the data to compress is + lost after a compress operation is interrupted. + - Total data crc and length. This is the crc and length of all of the data + stored in the gzip file, compressed and uncompressed. It is used to + reconstruct the gzip trailer when compressing, as well as when recovering + interrupted operations. + - Final stored block length. This is used to quickly find where to append, + and allows the restoration of the original final stored block state when + an append operation is interrupted. + - First stored block start as the number of bits back from the final stored + block first length byte. This value is in the range of 3..10, and is + stored as the low three bits of the final byte of the extra field after + subtracting three (0..7). This allows the last-block bit of the stored + block header to be updated when a new stored block is added, for the case + when the first stored block and the last stored block are the same. (When + they are different, the numbers of bits back is known to be eight.) This + also allows for new compressed data to be appended to the old compressed + data in the compress operation, overwriting the previous first stored + block, or for the compressed data to be terminated and a valid gzip file + reconstructed on the off chance that a compression operation was + interrupted and the data to compress in the foo.add file was deleted. + - The operation in process. This is the next two bits in the last byte (the + bits under the mask 0x18). The are interpreted as 0: nothing in process, + 1: append in process, 2: compress in process, 3: replace in process. + - The top three bits of the last byte in the extra field are reserved and + are currently set to zero. + + Main procedure: + - Exclusively create the foo.lock file using the O_CREAT and O_EXCL modes of + the system open() call. If the modify time of an existing lock file is + more than PATIENCE seconds old, then the lock file is deleted and the + exclusive create is retried. + - Load the extra field from the foo.gz file, and see if an operation was in + progress but not completed. If so, apply the recovery procedure below. + - Perform the append procedure with the provided data. + - If the uncompressed data in the foo.gz file is 1MB or more, apply the + compress procedure. + - Delete the foo.lock file. + + Append procedure: + - Put what to append in the foo.add file so that the operation can be + restarted if this procedure is interrupted. + - Mark the foo.gz extra field with the append operation in progress. + + Restore the original last-block bit and stored block length of the last + stored block from the information in the extra field, in case a previous + append operation was interrupted. + - Append the provided data to the last stored block, creating new stored + blocks as needed and updating the stored blocks last-block bits and + lengths. + - Update the crc and length with the new data, and write the gzip trailer. + - Write over the extra field (with a single write operation) with the new + pointers, lengths, and crc's, and mark the gzip file as not in process. + Though there is still a foo.add file, it will be ignored since nothing + is in process. If a foo.add file is leftover from a previously + completed operation, it is truncated when writing new data to it. + - Delete the foo.add file. + + Compress and replace procedures: + - Read all of the uncompressed data in the stored blocks in foo.gz and write + it to foo.add. Also write foo.temp with the last 32K of that data to + provide a dictionary for the next invocation of this procedure. + - Rewrite the extra field marking foo.gz with a compression in process. + * If there is no data provided to compress (due to a missing foo.add file + when recovering), reconstruct and truncate the foo.gz file to contain + only the previous compressed data and proceed to the step after the next + one. Otherwise ... + - Compress the data with the dictionary in foo.dict, and write to the + foo.gz file starting at the bit immediately following the last previously + compressed block. If there is no foo.dict, proceed anyway with the + compression at slightly reduced efficiency. (For the foo.dict file to be + missing requires some external failure beyond simply the interruption of + a compress operation.) During this process, the foo.lock file is + periodically touched to assure that that file is not considered stale by + another process before we're done. The deflation is terminated with a + non-last empty static block (10 bits long), that is then located and + written over by a last-bit-set empty stored block. + - Append the crc and length of the data in the gzip file (previously + calculated during the append operations). + - Write over the extra field with the updated stored block offsets, bits + back, crc's, and lengths, and mark foo.gz as in process for a replacement + of the dictionary. + @ Delete the foo.add file. + - Replace foo.dict with foo.temp. + - Write over the extra field, marking foo.gz as complete. + + Recovery procedure: + - If not a replace recovery, read in the foo.add file, and provide that data + to the appropriate recovery below. If there is no foo.add file, provide + a zero data length to the recovery. In that case, the append recovery + restores the foo.gz to the previous compressed + uncompressed data state. + For the compress recovery, a missing foo.add file results in foo.gz being + restored to the previous compressed-only data state. + - Append recovery: + - Pick up append at + step above + - Compress recovery: + - Pick up compress at * step above + - Replace recovery: + - Pick up compress at @ step above + - Log the repair with a date stamp in foo.repairs + */ + +#include +#include /* rename, fopen, fprintf, fclose */ +#include /* malloc, free */ +#include /* strlen, strrchr, strcpy, strncpy, strcmp */ +#include /* open */ +#include /* lseek, read, write, close, unlink, sleep, */ + /* ftruncate, fsync */ +#include /* errno */ +#include /* time, ctime */ +#include /* stat */ +#include /* utimes */ +#include "zlib.h" /* crc32 */ + +#include "gzlog.h" /* header for external access */ + +#define local static +typedef unsigned int uint; +typedef unsigned long ulong; + +/* Macro for debugging to deterministically force recovery operations */ +#ifdef GZLOG_DEBUG + #include /* longjmp */ + jmp_buf gzlog_jump; /* where to go back to */ + int gzlog_bail = 0; /* which point to bail at (1..8) */ + int gzlog_count = -1; /* number of times through to wait */ +# define BAIL(n) do { if (n == gzlog_bail && gzlog_count-- == 0) \ + longjmp(gzlog_jump, gzlog_bail); } while (0) +#else +# define BAIL(n) +#endif + +/* how old the lock file can be in seconds before considering it stale */ +#define PATIENCE 300 + +/* maximum stored block size in Kbytes -- must be in 1..63 */ +#define MAX_STORE 16 + +/* number of stored Kbytes to trigger compression (must be >= 32 to allow + dictionary construction, and <= 204 * MAX_STORE, in order for >> 10 to + discard the stored block headers contribution of five bytes each) */ +#define TRIGGER 1024 + +/* size of a deflate dictionary (this cannot be changed) */ +#define DICT 32768U + +/* values for the operation (2 bits) */ +#define NO_OP 0 +#define APPEND_OP 1 +#define COMPRESS_OP 2 +#define REPLACE_OP 3 + +/* macros to extract little-endian integers from an unsigned byte buffer */ +#define PULL2(p) ((p)[0]+((uint)((p)[1])<<8)) +#define PULL4(p) (PULL2(p)+((ulong)PULL2(p+2)<<16)) +#define PULL8(p) (PULL4(p)+((off_t)PULL4(p+4)<<32)) + +/* macros to store integers into a byte buffer in little-endian order */ +#define PUT2(p,a) do {(p)[0]=a;(p)[1]=(a)>>8;} while(0) +#define PUT4(p,a) do {PUT2(p,a);PUT2(p+2,a>>16);} while(0) +#define PUT8(p,a) do {PUT4(p,a);PUT4(p+4,a>>32);} while(0) + +/* internal structure for log information */ +#define LOGID "\106\035\172" /* should be three non-zero characters */ +struct log { + char id[4]; /* contains LOGID to detect inadvertent overwrites */ + int fd; /* file descriptor for .gz file, opened read/write */ + char *path; /* allocated path, e.g. "/var/log/foo" or "foo" */ + char *end; /* end of path, for appending suffices such as ".gz" */ + off_t first; /* offset of first stored block first length byte */ + int back; /* location of first block id in bits back from first */ + uint stored; /* bytes currently in last stored block */ + off_t last; /* offset of last stored block first length byte */ + ulong ccrc; /* crc of compressed data */ + ulong clen; /* length (modulo 2^32) of compressed data */ + ulong tcrc; /* crc of total data */ + ulong tlen; /* length (modulo 2^32) of total data */ + time_t lock; /* last modify time of our lock file */ +}; + +/* gzip header for gzlog */ +local unsigned char log_gzhead[] = { + 0x1f, 0x8b, /* magic gzip id */ + 8, /* compression method is deflate */ + 4, /* there is an extra field (no file name) */ + 0, 0, 0, 0, /* no modification time provided */ + 0, 0xff, /* no extra flags, no OS specified */ + 39, 0, 'a', 'p', 35, 0 /* extra field with "ap" subfield */ + /* 35 is EXTRA, 39 is EXTRA + 4 */ +}; + +#define HEAD sizeof(log_gzhead) /* should be 16 */ + +/* initial gzip extra field content (52 == HEAD + EXTRA + 1) */ +local unsigned char log_gzext[] = { + 52, 0, 0, 0, 0, 0, 0, 0, /* offset of first stored block length */ + 52, 0, 0, 0, 0, 0, 0, 0, /* offset of last stored block length */ + 0, 0, 0, 0, 0, 0, 0, 0, /* compressed data crc and length */ + 0, 0, 0, 0, 0, 0, 0, 0, /* total data crc and length */ + 0, 0, /* final stored block data length */ + 5 /* op is NO_OP, last bit 8 bits back */ +}; + +#define EXTRA sizeof(log_gzext) /* should be 35 */ + +/* initial gzip data and trailer */ +local unsigned char log_gzbody[] = { + 1, 0, 0, 0xff, 0xff, /* empty stored block (last) */ + 0, 0, 0, 0, /* crc */ + 0, 0, 0, 0 /* uncompressed length */ +}; + +#define BODY sizeof(log_gzbody) + +/* Exclusively create foo.lock in order to negotiate exclusive access to the + foo.* files. If the modify time of an existing lock file is greater than + PATIENCE seconds in the past, then consider the lock file to have been + abandoned, delete it, and try the exclusive create again. Save the lock + file modify time for verification of ownership. Return 0 on success, or -1 + on failure, usually due to an access restriction or invalid path. Note that + if stat() or unlink() fails, it may be due to another process noticing the + abandoned lock file a smidge sooner and deleting it, so those are not + flagged as an error. */ +local int log_lock(struct log *log) +{ + int fd; + struct stat st; + + strcpy(log->end, ".lock"); + while ((fd = open(log->path, O_CREAT | O_EXCL, 0644)) < 0) { + if (errno != EEXIST) + return -1; + if (stat(log->path, &st) == 0 && time(NULL) - st.st_mtime > PATIENCE) { + unlink(log->path); + continue; + } + sleep(2); /* relinquish the CPU for two seconds while waiting */ + } + close(fd); + if (stat(log->path, &st) == 0) + log->lock = st.st_mtime; + return 0; +} + +/* Update the modify time of the lock file to now, in order to prevent another + task from thinking that the lock is stale. Save the lock file modify time + for verification of ownership. */ +local void log_touch(struct log *log) +{ + struct stat st; + + strcpy(log->end, ".lock"); + utimes(log->path, NULL); + if (stat(log->path, &st) == 0) + log->lock = st.st_mtime; +} + +/* Check the log file modify time against what is expected. Return true if + this is not our lock. If it is our lock, touch it to keep it. */ +local int log_check(struct log *log) +{ + struct stat st; + + strcpy(log->end, ".lock"); + if (stat(log->path, &st) || st.st_mtime != log->lock) + return 1; + log_touch(log); + return 0; +} + +/* Unlock a previously acquired lock, but only if it's ours. */ +local void log_unlock(struct log *log) +{ + if (log_check(log)) + return; + strcpy(log->end, ".lock"); + unlink(log->path); + log->lock = 0; +} + +/* Check the gzip header and read in the extra field, filling in the values in + the log structure. Return op on success or -1 if the gzip header was not as + expected. op is the current operation in progress last written to the extra + field. This assumes that the gzip file has already been opened, with the + file descriptor log->fd. */ +local int log_head(struct log *log) +{ + int op; + unsigned char buf[HEAD + EXTRA]; + + if (lseek(log->fd, 0, SEEK_SET) < 0 || + read(log->fd, buf, HEAD + EXTRA) != HEAD + EXTRA || + memcmp(buf, log_gzhead, HEAD)) { + return -1; + } + log->first = PULL8(buf + HEAD); + log->last = PULL8(buf + HEAD + 8); + log->ccrc = PULL4(buf + HEAD + 16); + log->clen = PULL4(buf + HEAD + 20); + log->tcrc = PULL4(buf + HEAD + 24); + log->tlen = PULL4(buf + HEAD + 28); + log->stored = PULL2(buf + HEAD + 32); + log->back = 3 + (buf[HEAD + 34] & 7); + op = (buf[HEAD + 34] >> 3) & 3; + return op; +} + +/* Write over the extra field contents, marking the operation as op. Use fsync + to assure that the device is written to, and in the requested order. This + operation, and only this operation, is assumed to be atomic in order to + assure that the log is recoverable in the event of an interruption at any + point in the process. Return -1 if the write to foo.gz failed. */ +local int log_mark(struct log *log, int op) +{ + int ret; + unsigned char ext[EXTRA]; + + PUT8(ext, log->first); + PUT8(ext + 8, log->last); + PUT4(ext + 16, log->ccrc); + PUT4(ext + 20, log->clen); + PUT4(ext + 24, log->tcrc); + PUT4(ext + 28, log->tlen); + PUT2(ext + 32, log->stored); + ext[34] = log->back - 3 + (op << 3); + fsync(log->fd); + ret = lseek(log->fd, HEAD, SEEK_SET) < 0 || + write(log->fd, ext, EXTRA) != EXTRA ? -1 : 0; + fsync(log->fd); + return ret; +} + +/* Rewrite the last block header bits and subsequent zero bits to get to a byte + boundary, setting the last block bit if last is true, and then write the + remainder of the stored block header (length and one's complement). Leave + the file pointer after the end of the last stored block data. Return -1 if + there is a read or write failure on the foo.gz file */ +local int log_last(struct log *log, int last) +{ + int back, len, mask; + unsigned char buf[6]; + + /* determine the locations of the bytes and bits to modify */ + back = log->last == log->first ? log->back : 8; + len = back > 8 ? 2 : 1; /* bytes back from log->last */ + mask = 0x80 >> ((back - 1) & 7); /* mask for block last-bit */ + + /* get the byte to modify (one or two back) into buf[0] -- don't need to + read the byte if the last-bit is eight bits back, since in that case + the entire byte will be modified */ + buf[0] = 0; + if (back != 8 && (lseek(log->fd, log->last - len, SEEK_SET) < 0 || + read(log->fd, buf, 1) != 1)) + return -1; + + /* change the last-bit of the last stored block as requested -- note + that all bits above the last-bit are set to zero, per the type bits + of a stored block being 00 and per the convention that the bits to + bring the stream to a byte boundary are also zeros */ + buf[1] = 0; + buf[2 - len] = (*buf & (mask - 1)) + (last ? mask : 0); + + /* write the modified stored block header and lengths, move the file + pointer to after the last stored block data */ + PUT2(buf + 2, log->stored); + PUT2(buf + 4, log->stored ^ 0xffff); + return lseek(log->fd, log->last - len, SEEK_SET) < 0 || + write(log->fd, buf + 2 - len, len + 4) != len + 4 || + lseek(log->fd, log->stored, SEEK_CUR) < 0 ? -1 : 0; +} + +/* Append len bytes from data to the locked and open log file. len may be zero + if recovering and no .add file was found. In that case, the previous state + of the foo.gz file is restored. The data is appended uncompressed in + deflate stored blocks. Return -1 if there was an error reading or writing + the foo.gz file. */ +local int log_append(struct log *log, unsigned char *data, size_t len) +{ + uint put; + off_t end; + unsigned char buf[8]; + + /* set the last block last-bit and length, in case recovering an + interrupted append, then position the file pointer to append to the + block */ + if (log_last(log, 1)) + return -1; + + /* append, adding stored blocks and updating the offset of the last stored + block as needed, and update the total crc and length */ + while (len) { + /* append as much as we can to the last block */ + put = (MAX_STORE << 10) - log->stored; + if (put > len) + put = (uint)len; + if (put) { + if (write(log->fd, data, put) != put) + return -1; + BAIL(1); + log->tcrc = crc32(log->tcrc, data, put); + log->tlen += put; + log->stored += put; + data += put; + len -= put; + } + + /* if we need to, add a new empty stored block */ + if (len) { + /* mark current block as not last */ + if (log_last(log, 0)) + return -1; + + /* point to new, empty stored block */ + log->last += 4 + log->stored + 1; + log->stored = 0; + } + + /* mark last block as last, update its length */ + if (log_last(log, 1)) + return -1; + BAIL(2); + } + + /* write the new crc and length trailer, and truncate just in case (could + be recovering from partial append with a missing foo.add file) */ + PUT4(buf, log->tcrc); + PUT4(buf + 4, log->tlen); + if (write(log->fd, buf, 8) != 8 || + (end = lseek(log->fd, 0, SEEK_CUR)) < 0 || ftruncate(log->fd, end)) + return -1; + + /* write the extra field, marking the log file as done, delete .add file */ + if (log_mark(log, NO_OP)) + return -1; + strcpy(log->end, ".add"); + unlink(log->path); /* ignore error, since may not exist */ + return 0; +} + +/* Replace the foo.dict file with the foo.temp file. Also delete the foo.add + file, since the compress operation may have been interrupted before that was + done. Returns 1 if memory could not be allocated, or -1 if reading or + writing foo.gz fails, or if the rename fails for some reason other than + foo.temp not existing. foo.temp not existing is a permitted error, since + the replace operation may have been interrupted after the rename is done, + but before foo.gz is marked as complete. */ +local int log_replace(struct log *log) +{ + int ret; + char *dest; + + /* delete foo.add file */ + strcpy(log->end, ".add"); + unlink(log->path); /* ignore error, since may not exist */ + BAIL(3); + + /* rename foo.name to foo.dict, replacing foo.dict if it exists */ + strcpy(log->end, ".dict"); + dest = malloc(strlen(log->path) + 1); + if (dest == NULL) + return -2; + strcpy(dest, log->path); + strcpy(log->end, ".temp"); + ret = rename(log->path, dest); + free(dest); + if (ret && errno != ENOENT) + return -1; + BAIL(4); + + /* mark the foo.gz file as done */ + return log_mark(log, NO_OP); +} + +/* Compress the len bytes at data and append the compressed data to the + foo.gz deflate data immediately after the previous compressed data. This + overwrites the previous uncompressed data, which was stored in foo.add + and is the data provided in data[0..len-1]. If this operation is + interrupted, it picks up at the start of this routine, with the foo.add + file read in again. If there is no data to compress (len == 0), then we + simply terminate the foo.gz file after the previously compressed data, + appending a final empty stored block and the gzip trailer. Return -1 if + reading or writing the log.gz file failed, or -2 if there was a memory + allocation failure. */ +local int log_compress(struct log *log, unsigned char *data, size_t len) +{ + int fd; + uint got, max; + ssize_t dict; + off_t end; + z_stream strm; + unsigned char buf[DICT]; + + /* compress and append compressed data */ + if (len) { + /* set up for deflate, allocating memory */ + strm.zalloc = Z_NULL; + strm.zfree = Z_NULL; + strm.opaque = Z_NULL; + if (deflateInit2(&strm, Z_DEFAULT_COMPRESSION, Z_DEFLATED, -15, 8, + Z_DEFAULT_STRATEGY) != Z_OK) + return -2; + + /* read in dictionary (last 32K of data that was compressed) */ + strcpy(log->end, ".dict"); + fd = open(log->path, O_RDONLY, 0); + if (fd >= 0) { + dict = read(fd, buf, DICT); + close(fd); + if (dict < 0) { + deflateEnd(&strm); + return -1; + } + if (dict) + deflateSetDictionary(&strm, buf, (uint)dict); + } + log_touch(log); + + /* prime deflate with last bits of previous block, position write + pointer to write those bits and overwrite what follows */ + if (lseek(log->fd, log->first - (log->back > 8 ? 2 : 1), + SEEK_SET) < 0 || + read(log->fd, buf, 1) != 1 || lseek(log->fd, -1, SEEK_CUR) < 0) { + deflateEnd(&strm); + return -1; + } + deflatePrime(&strm, (8 - log->back) & 7, *buf); + + /* compress, finishing with a partial non-last empty static block */ + strm.next_in = data; + max = (((uint)0 - 1) >> 1) + 1; /* in case int smaller than size_t */ + do { + strm.avail_in = len > max ? max : (uint)len; + len -= strm.avail_in; + do { + strm.avail_out = DICT; + strm.next_out = buf; + deflate(&strm, len ? Z_NO_FLUSH : Z_PARTIAL_FLUSH); + got = DICT - strm.avail_out; + if (got && write(log->fd, buf, got) != got) { + deflateEnd(&strm); + return -1; + } + log_touch(log); + } while (strm.avail_out == 0); + } while (len); + deflateEnd(&strm); + BAIL(5); + + /* find start of empty static block -- scanning backwards the first one + bit is the second bit of the block, if the last byte is zero, then + we know the byte before that has a one in the top bit, since an + empty static block is ten bits long */ + if ((log->first = lseek(log->fd, -1, SEEK_CUR)) < 0 || + read(log->fd, buf, 1) != 1) + return -1; + log->first++; + if (*buf) { + log->back = 1; + while ((*buf & ((uint)1 << (8 - log->back++))) == 0) + ; /* guaranteed to terminate, since *buf != 0 */ + } + else + log->back = 10; + + /* update compressed crc and length */ + log->ccrc = log->tcrc; + log->clen = log->tlen; + } + else { + /* no data to compress -- fix up existing gzip stream */ + log->tcrc = log->ccrc; + log->tlen = log->clen; + } + + /* complete and truncate gzip stream */ + log->last = log->first; + log->stored = 0; + PUT4(buf, log->tcrc); + PUT4(buf + 4, log->tlen); + if (log_last(log, 1) || write(log->fd, buf, 8) != 8 || + (end = lseek(log->fd, 0, SEEK_CUR)) < 0 || ftruncate(log->fd, end)) + return -1; + BAIL(6); + + /* mark as being in the replace operation */ + if (log_mark(log, REPLACE_OP)) + return -1; + + /* execute the replace operation and mark the file as done */ + return log_replace(log); +} + +/* log a repair record to the .repairs file */ +local void log_log(struct log *log, int op, char *record) +{ + time_t now; + FILE *rec; + + now = time(NULL); + strcpy(log->end, ".repairs"); + rec = fopen(log->path, "a"); + if (rec == NULL) + return; + fprintf(rec, "%.24s %s recovery: %s\n", ctime(&now), op == APPEND_OP ? + "append" : (op == COMPRESS_OP ? "compress" : "replace"), record); + fclose(rec); + return; +} + +/* Recover the interrupted operation op. First read foo.add for recovering an + append or compress operation. Return -1 if there was an error reading or + writing foo.gz or reading an existing foo.add, or -2 if there was a memory + allocation failure. */ +local int log_recover(struct log *log, int op) +{ + int fd, ret = 0; + unsigned char *data = NULL; + size_t len = 0; + struct stat st; + + /* log recovery */ + log_log(log, op, "start"); + + /* load foo.add file if expected and present */ + if (op == APPEND_OP || op == COMPRESS_OP) { + strcpy(log->end, ".add"); + if (stat(log->path, &st) == 0 && st.st_size) { + len = (size_t)(st.st_size); + if ((off_t)len != st.st_size || + (data = malloc(st.st_size)) == NULL) { + log_log(log, op, "allocation failure"); + return -2; + } + if ((fd = open(log->path, O_RDONLY, 0)) < 0) { + free(data); + log_log(log, op, ".add file read failure"); + return -1; + } + ret = (size_t)read(fd, data, len) != len; + close(fd); + if (ret) { + free(data); + log_log(log, op, ".add file read failure"); + return -1; + } + log_log(log, op, "loaded .add file"); + } + else + log_log(log, op, "missing .add file!"); + } + + /* recover the interrupted operation */ + switch (op) { + case APPEND_OP: + ret = log_append(log, data, len); + break; + case COMPRESS_OP: + ret = log_compress(log, data, len); + break; + case REPLACE_OP: + ret = log_replace(log); + } + + /* log status */ + log_log(log, op, ret ? "failure" : "complete"); + + /* clean up */ + if (data != NULL) + free(data); + return ret; +} + +/* Close the foo.gz file (if open) and release the lock. */ +local void log_close(struct log *log) +{ + if (log->fd >= 0) + close(log->fd); + log->fd = -1; + log_unlock(log); +} + +/* Open foo.gz, verify the header, and load the extra field contents, after + first creating the foo.lock file to gain exclusive access to the foo.* + files. If foo.gz does not exist or is empty, then write the initial header, + extra, and body content of an empty foo.gz log file. If there is an error + creating the lock file due to access restrictions, or an error reading or + writing the foo.gz file, or if the foo.gz file is not a proper log file for + this object (e.g. not a gzip file or does not contain the expected extra + field), then return true. If there is an error, the lock is released. + Otherwise, the lock is left in place. */ +local int log_open(struct log *log) +{ + int op; + + /* release open file resource if left over -- can occur if lock lost + between gzlog_open() and gzlog_write() */ + if (log->fd >= 0) + close(log->fd); + log->fd = -1; + + /* negotiate exclusive access */ + if (log_lock(log) < 0) + return -1; + + /* open the log file, foo.gz */ + strcpy(log->end, ".gz"); + log->fd = open(log->path, O_RDWR | O_CREAT, 0644); + if (log->fd < 0) { + log_close(log); + return -1; + } + + /* if new, initialize foo.gz with an empty log, delete old dictionary */ + if (lseek(log->fd, 0, SEEK_END) == 0) { + if (write(log->fd, log_gzhead, HEAD) != HEAD || + write(log->fd, log_gzext, EXTRA) != EXTRA || + write(log->fd, log_gzbody, BODY) != BODY) { + log_close(log); + return -1; + } + strcpy(log->end, ".dict"); + unlink(log->path); + } + + /* verify log file and load extra field information */ + if ((op = log_head(log)) < 0) { + log_close(log); + return -1; + } + + /* check for interrupted process and if so, recover */ + if (op != NO_OP && log_recover(log, op)) { + log_close(log); + return -1; + } + + /* touch the lock file to prevent another process from grabbing it */ + log_touch(log); + return 0; +} + +/* See gzlog.h for the description of the external methods below */ +gzlog *gzlog_open(char *path) +{ + size_t n; + struct log *log; + + /* check arguments */ + if (path == NULL || *path == 0) + return NULL; + + /* allocate and initialize log structure */ + log = malloc(sizeof(struct log)); + if (log == NULL) + return NULL; + strcpy(log->id, LOGID); + log->fd = -1; + + /* save path and end of path for name construction */ + n = strlen(path); + log->path = malloc(n + 9); /* allow for ".repairs" */ + if (log->path == NULL) { + free(log); + return NULL; + } + strcpy(log->path, path); + log->end = log->path + n; + + /* gain exclusive access and verify log file -- may perform a + recovery operation if needed */ + if (log_open(log)) { + free(log->path); + free(log); + return NULL; + } + + /* return pointer to log structure */ + return log; +} + +/* gzlog_compress() return values: + 0: all good + -1: file i/o error (usually access issue) + -2: memory allocation failure + -3: invalid log pointer argument */ +int gzlog_compress(gzlog *logd) +{ + int fd, ret; + uint block; + size_t len, next; + unsigned char *data, buf[5]; + struct log *log = logd; + + /* check arguments */ + if (log == NULL || strcmp(log->id, LOGID)) + return -3; + + /* see if we lost the lock -- if so get it again and reload the extra + field information (it probably changed), recover last operation if + necessary */ + if (log_check(log) && log_open(log)) + return -1; + + /* create space for uncompressed data */ + len = ((size_t)(log->last - log->first) & ~(((size_t)1 << 10) - 1)) + + log->stored; + if ((data = malloc(len)) == NULL) + return -2; + + /* do statement here is just a cheap trick for error handling */ + do { + /* read in the uncompressed data */ + if (lseek(log->fd, log->first - 1, SEEK_SET) < 0) + break; + next = 0; + while (next < len) { + if (read(log->fd, buf, 5) != 5) + break; + block = PULL2(buf + 1); + if (next + block > len || + read(log->fd, (char *)data + next, block) != block) + break; + next += block; + } + if (lseek(log->fd, 0, SEEK_CUR) != log->last + 4 + log->stored) + break; + log_touch(log); + + /* write the uncompressed data to the .add file */ + strcpy(log->end, ".add"); + fd = open(log->path, O_WRONLY | O_CREAT | O_TRUNC, 0644); + if (fd < 0) + break; + ret = (size_t)write(fd, data, len) != len; + if (ret | close(fd)) + break; + log_touch(log); + + /* write the dictionary for the next compress to the .temp file */ + strcpy(log->end, ".temp"); + fd = open(log->path, O_WRONLY | O_CREAT | O_TRUNC, 0644); + if (fd < 0) + break; + next = DICT > len ? len : DICT; + ret = (size_t)write(fd, (char *)data + len - next, next) != next; + if (ret | close(fd)) + break; + log_touch(log); + + /* roll back to compressed data, mark the compress in progress */ + log->last = log->first; + log->stored = 0; + if (log_mark(log, COMPRESS_OP)) + break; + BAIL(7); + + /* compress and append the data (clears mark) */ + ret = log_compress(log, data, len); + free(data); + return ret; + } while (0); + + /* broke out of do above on i/o error */ + free(data); + return -1; +} + +/* gzlog_write() return values: + 0: all good + -1: file i/o error (usually access issue) + -2: memory allocation failure + -3: invalid log pointer argument */ +int gzlog_write(gzlog *logd, void *data, size_t len) +{ + int fd, ret; + struct log *log = logd; + + /* check arguments */ + if (log == NULL || strcmp(log->id, LOGID)) + return -3; + if (data == NULL || len <= 0) + return 0; + + /* see if we lost the lock -- if so get it again and reload the extra + field information (it probably changed), recover last operation if + necessary */ + if (log_check(log) && log_open(log)) + return -1; + + /* create and write .add file */ + strcpy(log->end, ".add"); + fd = open(log->path, O_WRONLY | O_CREAT | O_TRUNC, 0644); + if (fd < 0) + return -1; + ret = (size_t)write(fd, data, len) != len; + if (ret | close(fd)) + return -1; + log_touch(log); + + /* mark log file with append in progress */ + if (log_mark(log, APPEND_OP)) + return -1; + BAIL(8); + + /* append data (clears mark) */ + if (log_append(log, data, len)) + return -1; + + /* check to see if it's time to compress -- if not, then done */ + if (((log->last - log->first) >> 10) + (log->stored >> 10) < TRIGGER) + return 0; + + /* time to compress */ + return gzlog_compress(log); +} + +/* gzlog_close() return values: + 0: ok + -3: invalid log pointer argument */ +int gzlog_close(gzlog *logd) +{ + struct log *log = logd; + + /* check arguments */ + if (log == NULL || strcmp(log->id, LOGID)) + return -3; + + /* close the log file and release the lock */ + log_close(log); + + /* free structure and return */ + if (log->path != NULL) + free(log->path); + strcpy(log->id, "bad"); + free(log); + return 0; +} diff --git a/c-blosc/internal-complibs/zlib-1.3.1/examples/gzlog.h b/c-blosc/internal-complibs/zlib-1.3.1/examples/gzlog.h new file mode 100644 index 0000000..4f05109 --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.3.1/examples/gzlog.h @@ -0,0 +1,91 @@ +/* gzlog.h + Copyright (C) 2004, 2008, 2012 Mark Adler, all rights reserved + version 2.2, 14 Aug 2012 + + This software is provided 'as-is', without any express or implied + warranty. In no event will the author be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + Mark Adler madler@alumni.caltech.edu + */ + +/* Version History: + 1.0 26 Nov 2004 First version + 2.0 25 Apr 2008 Complete redesign for recovery of interrupted operations + Interface changed slightly in that now path is a prefix + Compression now occurs as needed during gzlog_write() + gzlog_write() now always leaves the log file as valid gzip + 2.1 8 Jul 2012 Fix argument checks in gzlog_compress() and gzlog_write() + 2.2 14 Aug 2012 Clean up signed comparisons + */ + +/* + The gzlog object allows writing short messages to a gzipped log file, + opening the log file locked for small bursts, and then closing it. The log + object works by appending stored (uncompressed) data to the gzip file until + 1 MB has been accumulated. At that time, the stored data is compressed, and + replaces the uncompressed data in the file. The log file is truncated to + its new size at that time. After each write operation, the log file is a + valid gzip file that can decompressed to recover what was written. + + The gzlog operations can be interrupted at any point due to an application or + system crash, and the log file will be recovered the next time the log is + opened with gzlog_open(). + */ + +#ifndef GZLOG_H +#define GZLOG_H + +/* gzlog object type */ +typedef void gzlog; + +/* Open a gzlog object, creating the log file if it does not exist. Return + NULL on error. Note that gzlog_open() could take a while to complete if it + has to wait to verify that a lock is stale (possibly for five minutes), or + if there is significant contention with other instantiations of this object + when locking the resource. path is the prefix of the file names created by + this object. If path is "foo", then the log file will be "foo.gz", and + other auxiliary files will be created and destroyed during the process: + "foo.dict" for a compression dictionary, "foo.temp" for a temporary (next) + dictionary, "foo.add" for data being added or compressed, "foo.lock" for the + lock file, and "foo.repairs" to log recovery operations performed due to + interrupted gzlog operations. A gzlog_open() followed by a gzlog_close() + will recover a previously interrupted operation, if any. */ +gzlog *gzlog_open(char *path); + +/* Write to a gzlog object. Return zero on success, -1 if there is a file i/o + error on any of the gzlog files (this should not happen if gzlog_open() + succeeded, unless the device has run out of space or leftover auxiliary + files have permissions or ownership that prevent their use), -2 if there is + a memory allocation failure, or -3 if the log argument is invalid (e.g. if + it was not created by gzlog_open()). This function will write data to the + file uncompressed, until 1 MB has been accumulated, at which time that data + will be compressed. The log file will be a valid gzip file upon successful + return. */ +int gzlog_write(gzlog *log, void *data, size_t len); + +/* Force compression of any uncompressed data in the log. This should be used + sparingly, if at all. The main application would be when a log file will + not be appended to again. If this is used to compress frequently while + appending, it will both significantly increase the execution time and + reduce the compression ratio. The return codes are the same as for + gzlog_write(). */ +int gzlog_compress(gzlog *log); + +/* Close a gzlog object. Return zero on success, -3 if the log argument is + invalid. The log object is freed, and so cannot be referenced again. */ +int gzlog_close(gzlog *log); + +#endif diff --git a/c-blosc/internal-complibs/zlib-1.3.1/examples/gznorm.c b/c-blosc/internal-complibs/zlib-1.3.1/examples/gznorm.c new file mode 100644 index 0000000..68e0a0f --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.3.1/examples/gznorm.c @@ -0,0 +1,470 @@ +/* gznorm.c -- normalize a gzip stream + * Copyright (C) 2018 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + * Version 1.0 7 Oct 2018 Mark Adler */ + +// gznorm takes a gzip stream, potentially containing multiple members, and +// converts it to a gzip stream with a single member. In addition the gzip +// header is normalized, removing the file name and time stamp, and setting the +// other header contents (XFL, OS) to fixed values. gznorm does not recompress +// the data, so it is fast, but no advantage is gained from the history that +// could be available across member boundaries. + +#include // fread, fwrite, putc, fflush, ferror, fprintf, + // vsnprintf, stdout, stderr, NULL, FILE +#include // malloc, free +#include // strerror +#include // errno +#include // va_list, va_start, va_end +#include "zlib.h" // inflateInit2, inflate, inflateReset, inflateEnd, + // z_stream, z_off_t, crc32_combine, Z_NULL, Z_BLOCK, + // Z_OK, Z_STREAM_END, Z_BUF_ERROR, Z_DATA_ERROR, + // Z_MEM_ERROR + +#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(__CYGWIN__) +# include +# include +# define SET_BINARY_MODE(file) setmode(fileno(file), O_BINARY) +#else +# define SET_BINARY_MODE(file) +#endif + +#define local static + +// printf to an allocated string. Return the string, or NULL if the printf or +// allocation fails. +local char *aprintf(char *fmt, ...) { + // Get the length of the result of the printf. + va_list args; + va_start(args, fmt); + int len = vsnprintf(NULL, 0, fmt, args); + va_end(args); + if (len < 0) + return NULL; + + // Allocate the required space and printf to it. + char *str = malloc(len + 1); + if (str == NULL) + return NULL; + va_start(args, fmt); + vsnprintf(str, len + 1, fmt, args); + va_end(args); + return str; +} + +// Return with an error, putting an allocated error message in *err. Doing an +// inflateEnd() on an already ended state, or one with state set to Z_NULL, is +// permitted. +#define BYE(...) \ + do { \ + inflateEnd(&strm); \ + *err = aprintf(__VA_ARGS__); \ + return 1; \ + } while (0) + +// Chunk size for buffered reads and for decompression. Twice this many bytes +// will be allocated on the stack by gzip_normalize(). Must fit in an unsigned. +#define CHUNK 16384 + +// Read a gzip stream from in and write an equivalent normalized gzip stream to +// out. If given no input, an empty gzip stream will be written. If successful, +// 0 is returned, and *err is set to NULL. On error, 1 is returned, where the +// details of the error are returned in *err, a pointer to an allocated string. +// +// The input may be a stream with multiple gzip members, which is converted to +// a single gzip member on the output. Each gzip member is decompressed at the +// level of deflate blocks. This enables clearing the last-block bit, shifting +// the compressed data to concatenate to the previous member's compressed data, +// which can end at an arbitrary bit boundary, and identifying stored blocks in +// order to resynchronize those to byte boundaries. The deflate compressed data +// is terminated with a 10-bit empty fixed block. If any members on the input +// end with a 10-bit empty fixed block, then that block is excised from the +// stream. This avoids appending empty fixed blocks for every normalization, +// and assures that gzip_normalize applied a second time will not change the +// input. The pad bits after stored block headers and after the final deflate +// block are all forced to zeros. +local int gzip_normalize(FILE *in, FILE *out, char **err) { + // initialize the inflate engine to process a gzip member + z_stream strm; + strm.zalloc = Z_NULL; + strm.zfree = Z_NULL; + strm.opaque = Z_NULL; + strm.avail_in = 0; + strm.next_in = Z_NULL; + if (inflateInit2(&strm, 15 + 16) != Z_OK) + BYE("out of memory"); + + // State while processing the input gzip stream. + enum { // BETWEEN -> HEAD -> BLOCK -> TAIL -> BETWEEN -> ... + BETWEEN, // between gzip members (must end in this state) + HEAD, // reading a gzip header + BLOCK, // reading deflate blocks + TAIL // reading a gzip trailer + } state = BETWEEN; // current component being processed + unsigned long crc = 0; // accumulated CRC of uncompressed data + unsigned long len = 0; // accumulated length of uncompressed data + unsigned long buf = 0; // deflate stream bit buffer of num bits + int num = 0; // number of bits in buf (at bottom) + + // Write a canonical gzip header (no mod time, file name, comment, extra + // block, or extra flags, and OS is marked as unknown). + fwrite("\x1f\x8b\x08\0\0\0\0\0\0\xff", 1, 10, out); + + // Process the gzip stream from in until reaching the end of the input, + // encountering invalid input, or experiencing an i/o error. + int more; // true if not at the end of the input + do { + // State inside this loop. + unsigned char *put; // next input buffer location to process + int prev; // number of bits from previous block in + // the bit buffer, or -1 if not at the + // start of a block + unsigned long long memb; // uncompressed length of member + size_t tail; // number of trailer bytes read (0..8) + unsigned long part; // accumulated trailer component + + // Get the next chunk of input from in. + unsigned char dat[CHUNK]; + strm.avail_in = fread(dat, 1, CHUNK, in); + if (strm.avail_in == 0) + break; + more = strm.avail_in == CHUNK; + strm.next_in = put = dat; + + // Run that chunk of input through the inflate engine to exhaustion. + do { + // At this point it is assured that strm.avail_in > 0. + + // Inflate until the end of a gzip component (header, deflate + // block, trailer) is reached, or until all of the chunk is + // consumed. The resulting decompressed data is discarded, though + // the total size of the decompressed data in each member is + // tracked, for the calculation of the total CRC. + do { + // inflate and handle any errors + unsigned char scrap[CHUNK]; + strm.avail_out = CHUNK; + strm.next_out = scrap; + int ret = inflate(&strm, Z_BLOCK); + if (ret == Z_MEM_ERROR) + BYE("out of memory"); + if (ret == Z_DATA_ERROR) + BYE("input invalid: %s", strm.msg); + if (ret != Z_OK && ret != Z_BUF_ERROR && ret != Z_STREAM_END) + BYE("internal error"); + + // Update the number of uncompressed bytes generated in this + // member. The actual count (not modulo 2^32) is required to + // correctly compute the total CRC. + unsigned got = CHUNK - strm.avail_out; + memb += got; + if (memb < got) + BYE("overflow error"); + + // Continue to process this chunk until it is consumed, or + // until the end of a component (header, deflate block, or + // trailer) is reached. + } while (strm.avail_out == 0 && (strm.data_type & 0x80) == 0); + + // Since strm.avail_in was > 0 for the inflate call, some input was + // just consumed. It is therefore assured that put < strm.next_in. + + // Disposition the consumed component or part of a component. + switch (state) { + case BETWEEN: + state = HEAD; + // Fall through to HEAD when some or all of the header is + // processed. + + case HEAD: + // Discard the header. + if (strm.data_type & 0x80) { + // End of header reached -- deflate blocks follow. + put = strm.next_in; + prev = num; + memb = 0; + state = BLOCK; + } + break; + + case BLOCK: + // Copy the deflate stream to the output, but with the + // last-block-bit cleared. Re-synchronize stored block + // headers to the output byte boundaries. The bytes at + // put..strm.next_in-1 is the compressed data that has been + // processed and is ready to be copied to the output. + + // At this point, it is assured that new compressed data is + // available, i.e., put < strm.next_in. If prev is -1, then + // that compressed data starts in the middle of a deflate + // block. If prev is not -1, then the bits in the bit + // buffer, possibly combined with the bits in *put, contain + // the three-bit header of the new deflate block. In that + // case, prev is the number of bits from the previous block + // that remain in the bit buffer. Since num is the number + // of bits in the bit buffer, we have that num - prev is + // the number of bits from the new block currently in the + // bit buffer. + + // If strm.data_type & 0xc0 is 0x80, then the last byte of + // the available compressed data includes the last bits of + // the end of a deflate block. In that case, that last byte + // also has strm.data_type & 0x1f bits of the next deflate + // block, in the range 0..7. If strm.data_type & 0xc0 is + // 0xc0, then the last byte of the compressed data is the + // end of the deflate stream, followed by strm.data_type & + // 0x1f pad bits, also in the range 0..7. + + // Set bits to the number of bits not yet consumed from the + // last byte. If we are at the end of the block, bits is + // either the number of bits in the last byte belonging to + // the next block, or the number of pad bits after the + // final block. In either of those cases, bits is in the + // range 0..7. + ; // (required due to C syntax oddity) + int bits = strm.data_type & 0x1f; + + if (prev != -1) { + // We are at the start of a new block. Clear the last + // block bit, and check for special cases. If it is a + // stored block, then emit the header and pad to the + // next byte boundary. If it is a final, empty fixed + // block, then excise it. + + // Some or all of the three header bits for this block + // may already be in the bit buffer. Load any remaining + // header bits into the bit buffer. + if (num - prev < 3) { + buf += (unsigned long)*put++ << num; + num += 8; + } + + // Set last to have a 1 in the position of the last + // block bit in the bit buffer. + unsigned long last = (unsigned long)1 << prev; + + if (((buf >> prev) & 7) == 3) { + // This is a final fixed block. Load at least ten + // bits from this block, including the header, into + // the bit buffer. We already have at least three, + // so at most one more byte needs to be loaded. + if (num - prev < 10) { + if (put == strm.next_in) + // Need to go get and process more input. + // We'll end up back here to finish this. + break; + buf += (unsigned long)*put++ << num; + num += 8; + } + if (((buf >> prev) & 0x3ff) == 3) { + // That final fixed block is empty. Delete it + // to avoid adding an empty block every time a + // gzip stream is normalized. + num = prev; + buf &= last - 1; // zero the pad bits + } + } + else if (((buf >> prev) & 6) == 0) { + // This is a stored block. Flush to the next + // byte boundary after the three-bit header. + num = (prev + 10) & ~7; + buf &= last - 1; // zero the pad bits + } + + // Clear the last block bit. + buf &= ~last; + + // Write out complete bytes in the bit buffer. + while (num >= 8) { + putc(buf, out); + buf >>= 8; + num -= 8; + } + + // If no more bytes left to process, then we have + // consumed the byte that had bits from the next block. + if (put == strm.next_in) + bits = 0; + } + + // We are done handling the deflate block header. Now copy + // all or almost all of the remaining compressed data that + // has been processed so far. Don't copy one byte at the + // end if it contains bits from the next deflate block or + // pad bits at the end of a deflate block. + + // mix is 1 if we are at the end of a deflate block, and if + // some of the bits in the last byte follow this block. mix + // is 0 if we are in the middle of a deflate block, if the + // deflate block ended on a byte boundary, or if all of the + // compressed data processed so far has been consumed. + int mix = (strm.data_type & 0x80) && bits; + + // Copy all of the processed compressed data to the output, + // except for the last byte if it contains bits from the + // next deflate block or pad bits at the end of the deflate + // stream. Copy the data after shifting in num bits from + // buf in front of it, leaving num bits from the end of the + // compressed data in buf when done. + unsigned char *end = strm.next_in - mix; + if (put < end) { + if (num) + // Insert num bits from buf before the data being + // copied. + do { + buf += (unsigned)(*put++) << num; + putc(buf, out); + buf >>= 8; + } while (put < end); + else { + // No shifting needed -- write directly. + fwrite(put, 1, end - put, out); + put = end; + } + } + + // Process the last processed byte if it wasn't written. + if (mix) { + // Load the last byte into the bit buffer. + buf += (unsigned)(*put++) << num; + num += 8; + + if (strm.data_type & 0x40) { + // We are at the end of the deflate stream and + // there are bits pad bits. Discard the pad bits + // and write a byte to the output, if available. + // Leave the num bits left over in buf to prepend + // to the next deflate stream. + num -= bits; + if (num >= 8) { + putc(buf, out); + num -= 8; + buf >>= 8; + } + + // Force the pad bits in the bit buffer to zeros. + buf &= ((unsigned long)1 << num) - 1; + + // Don't need to set prev here since going to TAIL. + } + else + // At the end of an internal deflate block. Leave + // the last byte in the bit buffer to examine on + // the next entry to BLOCK, when more bits from the + // next block will be available. + prev = num - bits; // number of bits in buffer + // from current block + } + + // Don't have a byte left over, so we are in the middle of + // a deflate block, or the deflate block ended on a byte + // boundary. Set prev appropriately for the next entry into + // BLOCK. + else if (strm.data_type & 0x80) + // The block ended on a byte boundary, so no header + // bits are in the bit buffer. + prev = num; + else + // In the middle of a deflate block, so no header here. + prev = -1; + + // Check for the end of the deflate stream. + if ((strm.data_type & 0xc0) == 0xc0) { + // That ends the deflate stream on the input side, the + // pad bits were discarded, and any remaining bits from + // the last block in the stream are saved in the bit + // buffer to prepend to the next stream. Process the + // gzip trailer next. + tail = 0; + part = 0; + state = TAIL; + } + break; + + case TAIL: + // Accumulate available trailer bytes to update the total + // CRC and the total uncompressed length. + do { + part = (part >> 8) + ((unsigned long)(*put++) << 24); + tail++; + if (tail == 4) { + // Update the total CRC. + z_off_t len2 = memb; + if (len2 < 0 || (unsigned long long)len2 != memb) + BYE("overflow error"); + crc = crc ? crc32_combine(crc, part, len2) : part; + part = 0; + } + else if (tail == 8) { + // Update the total uncompressed length. (It's ok + // if this sum is done modulo 2^32.) + len += part; + + // At the end of a member. Set up to inflate an + // immediately following gzip member. (If we made + // it this far, then the trailer was valid.) + if (inflateReset(&strm) != Z_OK) + BYE("internal error"); + state = BETWEEN; + break; + } + } while (put < strm.next_in); + break; + } + + // Process the input buffer until completely consumed. + } while (strm.avail_in > 0); + + // Process input until end of file, invalid input, or i/o error. + } while (more); + + // Done with the inflate engine. + inflateEnd(&strm); + + // Verify the validity of the input. + if (state != BETWEEN) + BYE("input invalid: incomplete gzip stream"); + + // Write the remaining deflate stream bits, followed by a terminating + // deflate fixed block. + buf += (unsigned long)3 << num; + putc(buf, out); + putc(buf >> 8, out); + if (num > 6) + putc(0, out); + + // Write the gzip trailer, which is the CRC and the uncompressed length + // modulo 2^32, both in little-endian order. + putc(crc, out); + putc(crc >> 8, out); + putc(crc >> 16, out); + putc(crc >> 24, out); + putc(len, out); + putc(len >> 8, out); + putc(len >> 16, out); + putc(len >> 24, out); + fflush(out); + + // Check for any i/o errors. + if (ferror(in) || ferror(out)) + BYE("i/o error: %s", strerror(errno)); + + // All good! + *err = NULL; + return 0; +} + +// Normalize the gzip stream on stdin, writing the result to stdout. +int main(void) { + // Avoid end-of-line conversions on evil operating systems. + SET_BINARY_MODE(stdin); + SET_BINARY_MODE(stdout); + + // Normalize from stdin to stdout, returning 1 on error, 0 if ok. + char *err; + int ret = gzip_normalize(stdin, stdout, &err); + if (ret) + fprintf(stderr, "gznorm error: %s\n", err); + free(err); + return ret; +} diff --git a/c-blosc/internal-complibs/zlib-1.3.1/examples/zpipe.c b/c-blosc/internal-complibs/zlib-1.3.1/examples/zpipe.c new file mode 100644 index 0000000..83535d1 --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.3.1/examples/zpipe.c @@ -0,0 +1,205 @@ +/* zpipe.c: example of proper use of zlib's inflate() and deflate() + Not copyrighted -- provided to the public domain + Version 1.4 11 December 2005 Mark Adler */ + +/* Version history: + 1.0 30 Oct 2004 First version + 1.1 8 Nov 2004 Add void casting for unused return values + Use switch statement for inflate() return values + 1.2 9 Nov 2004 Add assertions to document zlib guarantees + 1.3 6 Apr 2005 Remove incorrect assertion in inf() + 1.4 11 Dec 2005 Add hack to avoid MSDOS end-of-line conversions + Avoid some compiler warnings for input and output buffers + */ + +#include +#include +#include +#include "zlib.h" + +#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(__CYGWIN__) +# include +# include +# define SET_BINARY_MODE(file) setmode(fileno(file), O_BINARY) +#else +# define SET_BINARY_MODE(file) +#endif + +#define CHUNK 16384 + +/* Compress from file source to file dest until EOF on source. + def() returns Z_OK on success, Z_MEM_ERROR if memory could not be + allocated for processing, Z_STREAM_ERROR if an invalid compression + level is supplied, Z_VERSION_ERROR if the version of zlib.h and the + version of the library linked do not match, or Z_ERRNO if there is + an error reading or writing the files. */ +int def(FILE *source, FILE *dest, int level) +{ + int ret, flush; + unsigned have; + z_stream strm; + unsigned char in[CHUNK]; + unsigned char out[CHUNK]; + + /* allocate deflate state */ + strm.zalloc = Z_NULL; + strm.zfree = Z_NULL; + strm.opaque = Z_NULL; + ret = deflateInit(&strm, level); + if (ret != Z_OK) + return ret; + + /* compress until end of file */ + do { + strm.avail_in = fread(in, 1, CHUNK, source); + if (ferror(source)) { + (void)deflateEnd(&strm); + return Z_ERRNO; + } + flush = feof(source) ? Z_FINISH : Z_NO_FLUSH; + strm.next_in = in; + + /* run deflate() on input until output buffer not full, finish + compression if all of source has been read in */ + do { + strm.avail_out = CHUNK; + strm.next_out = out; + ret = deflate(&strm, flush); /* no bad return value */ + assert(ret != Z_STREAM_ERROR); /* state not clobbered */ + have = CHUNK - strm.avail_out; + if (fwrite(out, 1, have, dest) != have || ferror(dest)) { + (void)deflateEnd(&strm); + return Z_ERRNO; + } + } while (strm.avail_out == 0); + assert(strm.avail_in == 0); /* all input will be used */ + + /* done when last data in file processed */ + } while (flush != Z_FINISH); + assert(ret == Z_STREAM_END); /* stream will be complete */ + + /* clean up and return */ + (void)deflateEnd(&strm); + return Z_OK; +} + +/* Decompress from file source to file dest until stream ends or EOF. + inf() returns Z_OK on success, Z_MEM_ERROR if memory could not be + allocated for processing, Z_DATA_ERROR if the deflate data is + invalid or incomplete, Z_VERSION_ERROR if the version of zlib.h and + the version of the library linked do not match, or Z_ERRNO if there + is an error reading or writing the files. */ +int inf(FILE *source, FILE *dest) +{ + int ret; + unsigned have; + z_stream strm; + unsigned char in[CHUNK]; + unsigned char out[CHUNK]; + + /* allocate inflate state */ + strm.zalloc = Z_NULL; + strm.zfree = Z_NULL; + strm.opaque = Z_NULL; + strm.avail_in = 0; + strm.next_in = Z_NULL; + ret = inflateInit(&strm); + if (ret != Z_OK) + return ret; + + /* decompress until deflate stream ends or end of file */ + do { + strm.avail_in = fread(in, 1, CHUNK, source); + if (ferror(source)) { + (void)inflateEnd(&strm); + return Z_ERRNO; + } + if (strm.avail_in == 0) + break; + strm.next_in = in; + + /* run inflate() on input until output buffer not full */ + do { + strm.avail_out = CHUNK; + strm.next_out = out; + ret = inflate(&strm, Z_NO_FLUSH); + assert(ret != Z_STREAM_ERROR); /* state not clobbered */ + switch (ret) { + case Z_NEED_DICT: + ret = Z_DATA_ERROR; /* and fall through */ + case Z_DATA_ERROR: + case Z_MEM_ERROR: + (void)inflateEnd(&strm); + return ret; + } + have = CHUNK - strm.avail_out; + if (fwrite(out, 1, have, dest) != have || ferror(dest)) { + (void)inflateEnd(&strm); + return Z_ERRNO; + } + } while (strm.avail_out == 0); + + /* done when inflate() says it's done */ + } while (ret != Z_STREAM_END); + + /* clean up and return */ + (void)inflateEnd(&strm); + return ret == Z_STREAM_END ? Z_OK : Z_DATA_ERROR; +} + +/* report a zlib or i/o error */ +void zerr(int ret) +{ + fputs("zpipe: ", stderr); + switch (ret) { + case Z_ERRNO: + if (ferror(stdin)) + fputs("error reading stdin\n", stderr); + if (ferror(stdout)) + fputs("error writing stdout\n", stderr); + break; + case Z_STREAM_ERROR: + fputs("invalid compression level\n", stderr); + break; + case Z_DATA_ERROR: + fputs("invalid or incomplete deflate data\n", stderr); + break; + case Z_MEM_ERROR: + fputs("out of memory\n", stderr); + break; + case Z_VERSION_ERROR: + fputs("zlib version mismatch!\n", stderr); + } +} + +/* compress or decompress from stdin to stdout */ +int main(int argc, char **argv) +{ + int ret; + + /* avoid end-of-line conversions */ + SET_BINARY_MODE(stdin); + SET_BINARY_MODE(stdout); + + /* do compression if no arguments */ + if (argc == 1) { + ret = def(stdin, stdout, Z_DEFAULT_COMPRESSION); + if (ret != Z_OK) + zerr(ret); + return ret; + } + + /* do decompression if -d specified */ + else if (argc == 2 && strcmp(argv[1], "-d") == 0) { + ret = inf(stdin, stdout); + if (ret != Z_OK) + zerr(ret); + return ret; + } + + /* otherwise, report usage */ + else { + fputs("zpipe usage: zpipe [-d] < source > dest\n", stderr); + return 1; + } +} diff --git a/c-blosc/internal-complibs/zlib-1.3.1/examples/zran.c b/c-blosc/internal-complibs/zlib-1.3.1/examples/zran.c new file mode 100644 index 0000000..d313595 --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.3.1/examples/zran.c @@ -0,0 +1,533 @@ +/* zran.c -- example of deflate stream indexing and random access + * Copyright (C) 2005, 2012, 2018, 2023 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + * Version 1.4 13 Apr 2023 Mark Adler */ + +/* Version History: + 1.0 29 May 2005 First version + 1.1 29 Sep 2012 Fix memory reallocation error + 1.2 14 Oct 2018 Handle gzip streams with multiple members + Add a header file to facilitate usage in applications + 1.3 18 Feb 2023 Permit raw deflate streams as well as zlib and gzip + Permit crossing gzip member boundaries when extracting + Support a size_t size when extracting (was an int) + Do a binary search over the index for an access point + Expose the access point type to enable save and load + 1.4 13 Apr 2023 Add a NOPRIME define to not use inflatePrime() + */ + +// Illustrate the use of Z_BLOCK, inflatePrime(), and inflateSetDictionary() +// for random access of a compressed file. A file containing a raw deflate +// stream is provided on the command line. The compressed stream is decoded in +// its entirety, and an index built with access points about every SPAN bytes +// in the uncompressed output. The compressed file is left open, and can then +// be read randomly, having to decompress on the average SPAN/2 uncompressed +// bytes before getting to the desired block of data. +// +// An access point can be created at the start of any deflate block, by saving +// the starting file offset and bit of that block, and the 32K bytes of +// uncompressed data that precede that block. Also the uncompressed offset of +// that block is saved to provide a reference for locating a desired starting +// point in the uncompressed stream. deflate_index_build() decompresses the +// input raw deflate stream a block at a time, and at the end of each block +// decides if enough uncompressed data has gone by to justify the creation of a +// new access point. If so, that point is saved in a data structure that grows +// as needed to accommodate the points. +// +// To use the index, an offset in the uncompressed data is provided, for which +// the latest access point at or preceding that offset is located in the index. +// The input file is positioned to the specified location in the index, and if +// necessary the first few bits of the compressed data is read from the file. +// inflate is initialized with those bits and the 32K of uncompressed data, and +// decompression then proceeds until the desired offset in the file is reached. +// Then decompression continues to read the requested uncompressed data from +// the file. +// +// There is some fair bit of overhead to starting inflation for the random +// access, mainly copying the 32K byte dictionary. If small pieces of the file +// are being accessed, it would make sense to implement a cache to hold some +// lookahead to avoid many calls to deflate_index_extract() for small lengths. +// +// Another way to build an index would be to use inflateCopy(). That would not +// be constrained to have access points at block boundaries, but would require +// more memory per access point, and could not be saved to a file due to the +// use of pointers in the state. The approach here allows for storage of the +// index in a file. + +#include +#include +#include +#include +#include "zlib.h" +#include "zran.h" + +#define WINSIZE 32768U // sliding window size +#define CHUNK 16384 // file input buffer size + +// See comments in zran.h. +void deflate_index_free(struct deflate_index *index) { + if (index != NULL) { + free(index->list); + free(index); + } +} + +// Add an access point to the list. If out of memory, deallocate the existing +// list and return NULL. index->mode is temporarily the allocated number of +// access points, until it is time for deflate_index_build() to return. Then +// index->mode is set to the mode of inflation. +static struct deflate_index *add_point(struct deflate_index *index, int bits, + off_t in, off_t out, unsigned left, + unsigned char *window) { + if (index == NULL) { + // The list is empty. Create it, starting with eight access points. + index = malloc(sizeof(struct deflate_index)); + if (index == NULL) + return NULL; + index->have = 0; + index->mode = 8; + index->list = malloc(sizeof(point_t) * index->mode); + if (index->list == NULL) { + free(index); + return NULL; + } + } + + else if (index->have == index->mode) { + // The list is full. Make it bigger. + index->mode <<= 1; + point_t *next = realloc(index->list, sizeof(point_t) * index->mode); + if (next == NULL) { + deflate_index_free(index); + return NULL; + } + index->list = next; + } + + // Fill in the access point and increment how many we have. + point_t *next = (point_t *)(index->list) + index->have++; + if (index->have < 0) { + // Overflowed the int! + deflate_index_free(index); + return NULL; + } + next->out = out; + next->in = in; + next->bits = bits; + if (left) + memcpy(next->window, window + WINSIZE - left, left); + if (left < WINSIZE) + memcpy(next->window + left, window, WINSIZE - left); + + // Return the index, which may have been newly allocated or destroyed. + return index; +} + +// Decompression modes. These are the inflateInit2() windowBits parameter. +#define RAW -15 +#define ZLIB 15 +#define GZIP 31 + +// See comments in zran.h. +int deflate_index_build(FILE *in, off_t span, struct deflate_index **built) { + // Set up inflation state. + z_stream strm = {0}; // inflate engine (gets fired up later) + unsigned char buf[CHUNK]; // input buffer + unsigned char win[WINSIZE] = {0}; // output sliding window + off_t totin = 0; // total bytes read from input + off_t totout = 0; // total bytes uncompressed + int mode = 0; // mode: RAW, ZLIB, or GZIP (0 => not set yet) + + // Decompress from in, generating access points along the way. + int ret; // the return value from zlib, or Z_ERRNO + off_t last; // last access point uncompressed offset + struct deflate_index *index = NULL; // list of access points + do { + // Assure available input, at least until reaching EOF. + if (strm.avail_in == 0) { + strm.avail_in = fread(buf, 1, sizeof(buf), in); + totin += strm.avail_in; + strm.next_in = buf; + if (strm.avail_in < sizeof(buf) && ferror(in)) { + ret = Z_ERRNO; + break; + } + + if (mode == 0) { + // At the start of the input -- determine the type. Assume raw + // if it is neither zlib nor gzip. This could in theory result + // in a false positive for zlib, but in practice the fill bits + // after a stored block are always zeros, so a raw stream won't + // start with an 8 in the low nybble. + mode = strm.avail_in == 0 ? RAW : // empty -- will fail + (strm.next_in[0] & 0xf) == 8 ? ZLIB : + strm.next_in[0] == 0x1f ? GZIP : + /* else */ RAW; + ret = inflateInit2(&strm, mode); + if (ret != Z_OK) + break; + } + } + + // Assure available output. This rotates the output through, for use as + // a sliding window on the uncompressed data. + if (strm.avail_out == 0) { + strm.avail_out = sizeof(win); + strm.next_out = win; + } + + if (mode == RAW && index == NULL) + // We skip the inflate() call at the start of raw deflate data in + // order generate an access point there. Set data_type to imitate + // the end of a header. + strm.data_type = 0x80; + else { + // Inflate and update the number of uncompressed bytes. + unsigned before = strm.avail_out; + ret = inflate(&strm, Z_BLOCK); + totout += before - strm.avail_out; + } + + if ((strm.data_type & 0xc0) == 0x80 && + (index == NULL || totout - last >= span)) { + // We are at the end of a header or a non-last deflate block, so we + // can add an access point here. Furthermore, we are either at the + // very start for the first access point, or there has been span or + // more uncompressed bytes since the last access point, so we want + // to add an access point here. + index = add_point(index, strm.data_type & 7, totin - strm.avail_in, + totout, strm.avail_out, win); + if (index == NULL) { + ret = Z_MEM_ERROR; + break; + } + last = totout; + } + + if (ret == Z_STREAM_END && mode == GZIP && + (strm.avail_in || ungetc(getc(in), in) != EOF)) + // There is more input after the end of a gzip member. Reset the + // inflate state to read another gzip member. On success, this will + // set ret to Z_OK to continue decompressing. + ret = inflateReset2(&strm, GZIP); + + // Keep going until Z_STREAM_END or error. If the compressed data ends + // prematurely without a file read error, Z_BUF_ERROR is returned. + } while (ret == Z_OK); + inflateEnd(&strm); + + if (ret != Z_STREAM_END) { + // An error was encountered. Discard the index and return a negative + // error code. + deflate_index_free(index); + return ret == Z_NEED_DICT ? Z_DATA_ERROR : ret; + } + + // Shrink the index to only the occupied access points and return it. + index->mode = mode; + index->length = totout; + point_t *list = realloc(index->list, sizeof(point_t) * index->have); + if (list == NULL) { + // Seems like a realloc() to make something smaller should always work, + // but just in case. + deflate_index_free(index); + return Z_MEM_ERROR; + } + index->list = list; + *built = index; + return index->have; +} + +#ifdef NOPRIME +// Support zlib versions before 1.2.3 (July 2005), or incomplete zlib clones +// that do not have inflatePrime(). + +# define INFLATEPRIME inflatePreface + +// Append the low bits bits of value to in[] at bit position *have, updating +// *have. value must be zero above its low bits bits. bits must be positive. +// This assumes that any bits above the *have bits in the last byte are zeros. +// That assumption is preserved on return, as any bits above *have + bits in +// the last byte written will be set to zeros. +static inline void append_bits(unsigned value, int bits, + unsigned char *in, int *have) { + in += *have >> 3; // where the first bits from value will go + int k = *have & 7; // the number of bits already there + *have += bits; + if (k) + *in |= value << k; // write value above the low k bits + else + *in = value; + k = 8 - k; // the number of bits just appended + while (bits > k) { + value >>= k; // drop the bits appended + bits -= k; + k = 8; // now at a byte boundary + *++in = value; + } +} + +// Insert enough bits in the form of empty deflate blocks in front of the +// low bits bits of value, in order to bring the sequence to a byte boundary. +// Then feed that to inflate(). This does what inflatePrime() does, except that +// a negative value of bits is not supported. bits must be in 0..16. If the +// arguments are invalid, Z_STREAM_ERROR is returned. Otherwise the return +// value from inflate() is returned. +static int inflatePreface(z_stream *strm, int bits, int value) { + // Check input. + if (strm == Z_NULL || bits < 0 || bits > 16) + return Z_STREAM_ERROR; + if (bits == 0) + return Z_OK; + value &= (2 << (bits - 1)) - 1; + + // An empty dynamic block with an odd number of bits (95). The high bit of + // the last byte is unused. + static const unsigned char dyn[] = { + 4, 0xe0, 0x81, 8, 0, 0, 0, 0, 0x20, 0xa8, 0xab, 0x1f + }; + const int dynlen = 95; // number of bits in the block + + // Build an input buffer for inflate that is a multiple of eight bits in + // length, and that ends with the low bits bits of value. + unsigned char in[(dynlen + 3 * 10 + 16 + 7) / 8]; + int have = 0; + if (bits & 1) { + // Insert an empty dynamic block to get to an odd number of bits, so + // when bits bits from value are appended, we are at an even number of + // bits. + memcpy(in, dyn, sizeof(dyn)); + have = dynlen; + } + while ((have + bits) & 7) + // Insert empty fixed blocks until appending bits bits would put us on + // a byte boundary. This will insert at most three fixed blocks. + append_bits(2, 10, in, &have); + + // Append the bits bits from value, which takes us to a byte boundary. + append_bits(value, bits, in, &have); + + // Deliver the input to inflate(). There is no output space provided, but + // inflate() can't get stuck waiting on output not ingesting all of the + // provided input. The reason is that there will be at most 16 bits of + // input from value after the empty deflate blocks (which themselves + // generate no output). At least ten bits are needed to generate the first + // output byte from a fixed block. The last two bytes of the buffer have to + // be ingested in order to get ten bits, which is the most that value can + // occupy. + strm->avail_in = have >> 3; + strm->next_in = in; + strm->avail_out = 0; + strm->next_out = in; // not used, but can't be NULL + return inflate(strm, Z_NO_FLUSH); +} + +#else +# define INFLATEPRIME inflatePrime +#endif + +// See comments in zran.h. +ptrdiff_t deflate_index_extract(FILE *in, struct deflate_index *index, + off_t offset, unsigned char *buf, size_t len) { + // Do a quick sanity check on the index. + if (index == NULL || index->have < 1 || index->list[0].out != 0) + return Z_STREAM_ERROR; + + // If nothing to extract, return zero bytes extracted. + if (len == 0 || offset < 0 || offset >= index->length) + return 0; + + // Find the access point closest to but not after offset. + int lo = -1, hi = index->have; + point_t *point = index->list; + while (hi - lo > 1) { + int mid = (lo + hi) >> 1; + if (offset < point[mid].out) + hi = mid; + else + lo = mid; + } + point += lo; + + // Initialize the input file and prime the inflate engine to start there. + int ret = fseeko(in, point->in - (point->bits ? 1 : 0), SEEK_SET); + if (ret == -1) + return Z_ERRNO; + int ch = 0; + if (point->bits && (ch = getc(in)) == EOF) + return ferror(in) ? Z_ERRNO : Z_BUF_ERROR; + z_stream strm = {0}; + ret = inflateInit2(&strm, RAW); + if (ret != Z_OK) + return ret; + if (point->bits) + INFLATEPRIME(&strm, point->bits, ch >> (8 - point->bits)); + inflateSetDictionary(&strm, point->window, WINSIZE); + + // Skip uncompressed bytes until offset reached, then satisfy request. + unsigned char input[CHUNK]; + unsigned char discard[WINSIZE]; + offset -= point->out; // number of bytes to skip to get to offset + size_t left = len; // number of bytes left to read after offset + do { + if (offset) { + // Discard up to offset uncompressed bytes. + strm.avail_out = offset < WINSIZE ? (unsigned)offset : WINSIZE; + strm.next_out = discard; + } + else { + // Uncompress up to left bytes into buf. + strm.avail_out = left < UINT_MAX ? (unsigned)left : UINT_MAX; + strm.next_out = buf + len - left; + } + + // Uncompress, setting got to the number of bytes uncompressed. + if (strm.avail_in == 0) { + // Assure available input. + strm.avail_in = fread(input, 1, CHUNK, in); + if (strm.avail_in < CHUNK && ferror(in)) { + ret = Z_ERRNO; + break; + } + strm.next_in = input; + } + unsigned got = strm.avail_out; + ret = inflate(&strm, Z_NO_FLUSH); + got -= strm.avail_out; + + // Update the appropriate count. + if (offset) + offset -= got; + else + left -= got; + + // If we're at the end of a gzip member and there's more to read, + // continue to the next gzip member. + if (ret == Z_STREAM_END && index->mode == GZIP) { + // Discard the gzip trailer. + unsigned drop = 8; // length of gzip trailer + if (strm.avail_in >= drop) { + strm.avail_in -= drop; + strm.next_in += drop; + } + else { + // Read and discard the remainder of the gzip trailer. + drop -= strm.avail_in; + strm.avail_in = 0; + do { + if (getc(in) == EOF) + // The input does not have a complete trailer. + return ferror(in) ? Z_ERRNO : Z_BUF_ERROR; + } while (--drop); + } + + if (strm.avail_in || ungetc(getc(in), in) != EOF) { + // There's more after the gzip trailer. Use inflate to skip the + // gzip header and resume the raw inflate there. + inflateReset2(&strm, GZIP); + do { + if (strm.avail_in == 0) { + strm.avail_in = fread(input, 1, CHUNK, in); + if (strm.avail_in < CHUNK && ferror(in)) { + ret = Z_ERRNO; + break; + } + strm.next_in = input; + } + strm.avail_out = WINSIZE; + strm.next_out = discard; + ret = inflate(&strm, Z_BLOCK); // stop at end of header + } while (ret == Z_OK && (strm.data_type & 0x80) == 0); + if (ret != Z_OK) + break; + inflateReset2(&strm, RAW); + } + } + + // Continue until we have the requested data, the deflate data has + // ended, or an error is encountered. + } while (ret == Z_OK && left); + inflateEnd(&strm); + + // Return the number of uncompressed bytes read into buf, or the error. + return ret == Z_OK || ret == Z_STREAM_END ? len - left : ret; +} + +#ifdef TEST + +#define SPAN 1048576L // desired distance between access points +#define LEN 16384 // number of bytes to extract + +// Demonstrate the use of deflate_index_build() and deflate_index_extract() by +// processing the file provided on the command line, and extracting LEN bytes +// from 2/3rds of the way through the uncompressed output, writing that to +// stdout. An offset can be provided as the second argument, in which case the +// data is extracted from there instead. +int main(int argc, char **argv) { + // Open the input file. + if (argc < 2 || argc > 3) { + fprintf(stderr, "usage: zran file.raw [offset]\n"); + return 1; + } + FILE *in = fopen(argv[1], "rb"); + if (in == NULL) { + fprintf(stderr, "zran: could not open %s for reading\n", argv[1]); + return 1; + } + + // Get optional offset. + off_t offset = -1; + if (argc == 3) { + char *end; + offset = strtoll(argv[2], &end, 10); + if (*end || offset < 0) { + fprintf(stderr, "zran: %s is not a valid offset\n", argv[2]); + return 1; + } + } + + // Build index. + struct deflate_index *index = NULL; + int len = deflate_index_build(in, SPAN, &index); + if (len < 0) { + fclose(in); + switch (len) { + case Z_MEM_ERROR: + fprintf(stderr, "zran: out of memory\n"); + break; + case Z_BUF_ERROR: + fprintf(stderr, "zran: %s ended prematurely\n", argv[1]); + break; + case Z_DATA_ERROR: + fprintf(stderr, "zran: compressed data error in %s\n", argv[1]); + break; + case Z_ERRNO: + fprintf(stderr, "zran: read error on %s\n", argv[1]); + break; + default: + fprintf(stderr, "zran: error %d while building index\n", len); + } + return 1; + } + fprintf(stderr, "zran: built index with %d access points\n", len); + + // Use index by reading some bytes from an arbitrary offset. + unsigned char buf[LEN]; + if (offset == -1) + offset = ((index->length + 1) << 1) / 3; + ptrdiff_t got = deflate_index_extract(in, index, offset, buf, LEN); + if (got < 0) + fprintf(stderr, "zran: extraction failed: %s error\n", + got == Z_MEM_ERROR ? "out of memory" : "input corrupted"); + else { + fwrite(buf, 1, got, stdout); + fprintf(stderr, "zran: extracted %ld bytes at %lld\n", got, offset); + } + + // Clean up and exit. + deflate_index_free(index); + fclose(in); + return 0; +} + +#endif diff --git a/c-blosc/internal-complibs/zlib-1.3.1/examples/zran.h b/c-blosc/internal-complibs/zlib-1.3.1/examples/zran.h new file mode 100644 index 0000000..ebf780d --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.3.1/examples/zran.h @@ -0,0 +1,51 @@ +/* zran.h -- example of deflated stream indexing and random access + * Copyright (C) 2005, 2012, 2018, 2023 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + * Version 1.3 18 Feb 2023 Mark Adler */ + +#include +#include "zlib.h" + +// Access point. +typedef struct point { + off_t out; // offset in uncompressed data + off_t in; // offset in compressed file of first full byte + int bits; // 0, or number of bits (1-7) from byte at in-1 + unsigned char window[32768]; // preceding 32K of uncompressed data +} point_t; + +// Access point list. +struct deflate_index { + int have; // number of access points in list + int mode; // -15 for raw, 15 for zlib, or 31 for gzip + off_t length; // total length of uncompressed data + point_t *list; // allocated list of access points +}; + +// Make one pass through a zlib, gzip, or raw deflate compressed stream and +// build an index, with access points about every span bytes of uncompressed +// output. gzip files with multiple members are fully indexed. span should be +// chosen to balance the speed of random access against the memory requirements +// of the list, which is about 32K bytes per access point. The return value is +// the number of access points on success (>= 1), Z_MEM_ERROR for out of +// memory, Z_BUF_ERROR for a premature end of input, Z_DATA_ERROR for a format +// or verification error in the input file, or Z_ERRNO for a file read error. +// On success, *built points to the resulting index. +int deflate_index_build(FILE *in, off_t span, struct deflate_index **built); + +// Use the index to read len bytes from offset into buf. Return the number of +// bytes read or a negative error code. If data is requested past the end of +// the uncompressed data, then deflate_index_extract() will return a value less +// than len, indicating how much was actually read into buf. If given a valid +// index, this function should not return an error unless the file was modified +// somehow since the index was generated, given that deflate_index_build() had +// validated all of the input. If nevertheless there is a failure, Z_BUF_ERROR +// is returned if the compressed data ends prematurely, Z_DATA_ERROR if the +// deflate compressed data is not valid, Z_MEM_ERROR if out of memory, +// Z_STREAM_ERROR if the index is not valid, or Z_ERRNO if there is an error +// reading or seeking on the input file. +ptrdiff_t deflate_index_extract(FILE *in, struct deflate_index *index, + off_t offset, unsigned char *buf, size_t len); + +// Deallocate an index built by deflate_index_build(). +void deflate_index_free(struct deflate_index *index); diff --git a/c-blosc/internal-complibs/zlib-1.3.1/gzclose.c b/c-blosc/internal-complibs/zlib-1.3.1/gzclose.c new file mode 100644 index 0000000..48d6a86 --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.3.1/gzclose.c @@ -0,0 +1,23 @@ +/* gzclose.c -- zlib gzclose() function + * Copyright (C) 2004, 2010 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#include "gzguts.h" + +/* gzclose() is in a separate file so that it is linked in only if it is used. + That way the other gzclose functions can be used instead to avoid linking in + unneeded compression or decompression routines. */ +int ZEXPORT gzclose(gzFile file) { +#ifndef NO_GZCOMPRESS + gz_statep state; + + if (file == NULL) + return Z_STREAM_ERROR; + state = (gz_statep)file; + + return state->mode == GZ_READ ? gzclose_r(file) : gzclose_w(file); +#else + return gzclose_r(file); +#endif +} diff --git a/c-blosc/internal-complibs/zlib-1.3.1/gzguts.h b/c-blosc/internal-complibs/zlib-1.3.1/gzguts.h new file mode 100644 index 0000000..a47f779 --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.3.1/gzguts.h @@ -0,0 +1,218 @@ +/* gzguts.h -- zlib internal header definitions for gz* operations + * Copyright (C) 2004-2024 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#ifndef _WIN32 + #include +#endif + +#ifdef _LARGEFILE64_SOURCE +# ifndef _LARGEFILE_SOURCE +# define _LARGEFILE_SOURCE 1 +# endif +# undef _FILE_OFFSET_BITS +# undef _TIME_BITS +#endif + +#ifdef HAVE_HIDDEN +# define ZLIB_INTERNAL __attribute__((visibility ("hidden"))) +#else +# define ZLIB_INTERNAL +#endif + +#include +#include "zlib.h" +#ifdef STDC +# include +# include +# include +#endif + +#ifndef _POSIX_SOURCE +# define _POSIX_SOURCE +#endif +#include + +#ifdef _WIN32 +# include +#endif + +#if defined(__TURBOC__) || defined(_MSC_VER) || defined(_WIN32) +# include +#endif + +#if defined(_WIN32) +# define WIDECHAR +#endif + +#ifdef WINAPI_FAMILY +# define open _open +# define read _read +# define write _write +# define close _close +#endif + +#ifdef NO_DEFLATE /* for compatibility with old definition */ +# define NO_GZCOMPRESS +#endif + +#if defined(STDC99) || (defined(__TURBOC__) && __TURBOC__ >= 0x550) +# ifndef HAVE_VSNPRINTF +# define HAVE_VSNPRINTF +# endif +#endif + +#if defined(__CYGWIN__) +# ifndef HAVE_VSNPRINTF +# define HAVE_VSNPRINTF +# endif +#endif + +#if defined(MSDOS) && defined(__BORLANDC__) && (BORLANDC > 0x410) +# ifndef HAVE_VSNPRINTF +# define HAVE_VSNPRINTF +# endif +#endif + +#ifndef HAVE_VSNPRINTF +# ifdef MSDOS +/* vsnprintf may exist on some MS-DOS compilers (DJGPP?), + but for now we just assume it doesn't. */ +# define NO_vsnprintf +# endif +# ifdef __TURBOC__ +# define NO_vsnprintf +# endif +# ifdef WIN32 +/* In Win32, vsnprintf is available as the "non-ANSI" _vsnprintf. */ +# if !defined(vsnprintf) && !defined(NO_vsnprintf) +# if !defined(_MSC_VER) || ( defined(_MSC_VER) && _MSC_VER < 1500 ) +# define vsnprintf _vsnprintf +# endif +# endif +# endif +# ifdef __SASC +# define NO_vsnprintf +# endif +# ifdef VMS +# define NO_vsnprintf +# endif +# ifdef __OS400__ +# define NO_vsnprintf +# endif +# ifdef __MVS__ +# define NO_vsnprintf +# endif +#endif + +/* unlike snprintf (which is required in C99), _snprintf does not guarantee + null termination of the result -- however this is only used in gzlib.c where + the result is assured to fit in the space provided */ +#if defined(_MSC_VER) && _MSC_VER < 1900 +# define snprintf _snprintf +#endif + +#ifndef local +# define local static +#endif +/* since "static" is used to mean two completely different things in C, we + define "local" for the non-static meaning of "static", for readability + (compile with -Dlocal if your debugger can't find static symbols) */ + +/* gz* functions always use library allocation functions */ +#ifndef STDC + extern voidp malloc(uInt size); + extern void free(voidpf ptr); +#endif + +/* get errno and strerror definition */ +#if defined UNDER_CE +# include +# define zstrerror() gz_strwinerror((DWORD)GetLastError()) +#else +# ifndef NO_STRERROR +# include +# define zstrerror() strerror(errno) +# else +# define zstrerror() "stdio error (consult errno)" +# endif +#endif + +/* provide prototypes for these when building zlib without LFS */ +#if !defined(_LARGEFILE64_SOURCE) || _LFS64_LARGEFILE-0 == 0 + ZEXTERN gzFile ZEXPORT gzopen64(const char *, const char *); + ZEXTERN z_off64_t ZEXPORT gzseek64(gzFile, z_off64_t, int); + ZEXTERN z_off64_t ZEXPORT gztell64(gzFile); + ZEXTERN z_off64_t ZEXPORT gzoffset64(gzFile); +#endif + +/* default memLevel */ +#if MAX_MEM_LEVEL >= 8 +# define DEF_MEM_LEVEL 8 +#else +# define DEF_MEM_LEVEL MAX_MEM_LEVEL +#endif + +/* default i/o buffer size -- double this for output when reading (this and + twice this must be able to fit in an unsigned type) */ +#define GZBUFSIZE 8192 + +/* gzip modes, also provide a little integrity check on the passed structure */ +#define GZ_NONE 0 +#define GZ_READ 7247 +#define GZ_WRITE 31153 +#define GZ_APPEND 1 /* mode set to GZ_WRITE after the file is opened */ + +/* values for gz_state how */ +#define LOOK 0 /* look for a gzip header */ +#define COPY 1 /* copy input directly */ +#define GZIP 2 /* decompress a gzip stream */ + +/* internal gzip file state data structure */ +typedef struct { + /* exposed contents for gzgetc() macro */ + struct gzFile_s x; /* "x" for exposed */ + /* x.have: number of bytes available at x.next */ + /* x.next: next output data to deliver or write */ + /* x.pos: current position in uncompressed data */ + /* used for both reading and writing */ + int mode; /* see gzip modes above */ + int fd; /* file descriptor */ + char *path; /* path or fd for error messages */ + unsigned size; /* buffer size, zero if not allocated yet */ + unsigned want; /* requested buffer size, default is GZBUFSIZE */ + unsigned char *in; /* input buffer (double-sized when writing) */ + unsigned char *out; /* output buffer (double-sized when reading) */ + int direct; /* 0 if processing gzip, 1 if transparent */ + /* just for reading */ + int how; /* 0: get header, 1: copy, 2: decompress */ + z_off64_t start; /* where the gzip data started, for rewinding */ + int eof; /* true if end of input file reached */ + int past; /* true if read requested past end */ + /* just for writing */ + int level; /* compression level */ + int strategy; /* compression strategy */ + int reset; /* true if a reset is pending after a Z_FINISH */ + /* seek request */ + z_off64_t skip; /* amount to skip (already rewound if backwards) */ + int seek; /* true if seek request pending */ + /* error information */ + int err; /* error code */ + char *msg; /* error message */ + /* zlib inflate or deflate stream */ + z_stream strm; /* stream structure in-place (not a pointer) */ +} gz_state; +typedef gz_state FAR *gz_statep; + +/* shared functions */ +void ZLIB_INTERNAL gz_error(gz_statep, int, const char *); +#if defined UNDER_CE +char ZLIB_INTERNAL *gz_strwinerror(DWORD error); +#endif + +/* GT_OFF(x), where x is an unsigned value, is true if x > maximum z_off64_t + value -- needed when comparing unsigned to z_off64_t, which is signed + (possible z_off64_t types off_t, off64_t, and long are all signed) */ +unsigned ZLIB_INTERNAL gz_intmax(void); +#define GT_OFF(x) (sizeof(int) == sizeof(z_off64_t) && (x) > gz_intmax()) diff --git a/c-blosc/internal-complibs/zlib-1.3.1/gzlib.c b/c-blosc/internal-complibs/zlib-1.3.1/gzlib.c new file mode 100644 index 0000000..983153c --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.3.1/gzlib.c @@ -0,0 +1,582 @@ +/* gzlib.c -- zlib functions common to reading and writing gzip files + * Copyright (C) 2004-2024 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#include "gzguts.h" + +#if defined(_WIN32) && !defined(__BORLANDC__) +# define LSEEK _lseeki64 +#else +#if defined(_LARGEFILE64_SOURCE) && _LFS64_LARGEFILE-0 +# define LSEEK lseek64 +#else +# define LSEEK lseek +#endif +#endif + +#if defined UNDER_CE + +/* Map the Windows error number in ERROR to a locale-dependent error message + string and return a pointer to it. Typically, the values for ERROR come + from GetLastError. + + The string pointed to shall not be modified by the application, but may be + overwritten by a subsequent call to gz_strwinerror + + The gz_strwinerror function does not change the current setting of + GetLastError. */ +char ZLIB_INTERNAL *gz_strwinerror(DWORD error) { + static char buf[1024]; + + wchar_t *msgbuf; + DWORD lasterr = GetLastError(); + DWORD chars = FormatMessage(FORMAT_MESSAGE_FROM_SYSTEM + | FORMAT_MESSAGE_ALLOCATE_BUFFER, + NULL, + error, + 0, /* Default language */ + (LPVOID)&msgbuf, + 0, + NULL); + if (chars != 0) { + /* If there is an \r\n appended, zap it. */ + if (chars >= 2 + && msgbuf[chars - 2] == '\r' && msgbuf[chars - 1] == '\n') { + chars -= 2; + msgbuf[chars] = 0; + } + + if (chars > sizeof (buf) - 1) { + chars = sizeof (buf) - 1; + msgbuf[chars] = 0; + } + + wcstombs(buf, msgbuf, chars + 1); + LocalFree(msgbuf); + } + else { + sprintf(buf, "unknown win32 error (%ld)", error); + } + + SetLastError(lasterr); + return buf; +} + +#endif /* UNDER_CE */ + +/* Reset gzip file state */ +local void gz_reset(gz_statep state) { + state->x.have = 0; /* no output data available */ + if (state->mode == GZ_READ) { /* for reading ... */ + state->eof = 0; /* not at end of file */ + state->past = 0; /* have not read past end yet */ + state->how = LOOK; /* look for gzip header */ + } + else /* for writing ... */ + state->reset = 0; /* no deflateReset pending */ + state->seek = 0; /* no seek request pending */ + gz_error(state, Z_OK, NULL); /* clear error */ + state->x.pos = 0; /* no uncompressed data yet */ + state->strm.avail_in = 0; /* no input data yet */ +} + +/* Open a gzip file either by name or file descriptor. */ +local gzFile gz_open(const void *path, int fd, const char *mode) { + gz_statep state; + z_size_t len; + int oflag; +#ifdef O_CLOEXEC + int cloexec = 0; +#endif +#ifdef O_EXCL + int exclusive = 0; +#endif + + /* check input */ + if (path == NULL) + return NULL; + + /* allocate gzFile structure to return */ + state = (gz_statep)malloc(sizeof(gz_state)); + if (state == NULL) + return NULL; + state->size = 0; /* no buffers allocated yet */ + state->want = GZBUFSIZE; /* requested buffer size */ + state->msg = NULL; /* no error message yet */ + + /* interpret mode */ + state->mode = GZ_NONE; + state->level = Z_DEFAULT_COMPRESSION; + state->strategy = Z_DEFAULT_STRATEGY; + state->direct = 0; + while (*mode) { + if (*mode >= '0' && *mode <= '9') + state->level = *mode - '0'; + else + switch (*mode) { + case 'r': + state->mode = GZ_READ; + break; +#ifndef NO_GZCOMPRESS + case 'w': + state->mode = GZ_WRITE; + break; + case 'a': + state->mode = GZ_APPEND; + break; +#endif + case '+': /* can't read and write at the same time */ + free(state); + return NULL; + case 'b': /* ignore -- will request binary anyway */ + break; +#ifdef O_CLOEXEC + case 'e': + cloexec = 1; + break; +#endif +#ifdef O_EXCL + case 'x': + exclusive = 1; + break; +#endif + case 'f': + state->strategy = Z_FILTERED; + break; + case 'h': + state->strategy = Z_HUFFMAN_ONLY; + break; + case 'R': + state->strategy = Z_RLE; + break; + case 'F': + state->strategy = Z_FIXED; + break; + case 'T': + state->direct = 1; + break; + default: /* could consider as an error, but just ignore */ + ; + } + mode++; + } + + /* must provide an "r", "w", or "a" */ + if (state->mode == GZ_NONE) { + free(state); + return NULL; + } + + /* can't force transparent read */ + if (state->mode == GZ_READ) { + if (state->direct) { + free(state); + return NULL; + } + state->direct = 1; /* for empty file */ + } + + /* save the path name for error messages */ +#ifdef WIDECHAR + if (fd == -2) { + len = wcstombs(NULL, path, 0); + if (len == (z_size_t)-1) + len = 0; + } + else +#endif + len = strlen((const char *)path); + state->path = (char *)malloc(len + 1); + if (state->path == NULL) { + free(state); + return NULL; + } +#ifdef WIDECHAR + if (fd == -2) + if (len) + wcstombs(state->path, path, len + 1); + else + *(state->path) = 0; + else +#endif +#if !defined(NO_snprintf) && !defined(NO_vsnprintf) + (void)snprintf(state->path, len + 1, "%s", (const char *)path); +#else + strcpy(state->path, path); +#endif + + /* compute the flags for open() */ + oflag = +#ifdef O_LARGEFILE + O_LARGEFILE | +#endif +#ifdef O_BINARY + O_BINARY | +#endif +#ifdef O_CLOEXEC + (cloexec ? O_CLOEXEC : 0) | +#endif + (state->mode == GZ_READ ? + O_RDONLY : + (O_WRONLY | O_CREAT | +#ifdef O_EXCL + (exclusive ? O_EXCL : 0) | +#endif + (state->mode == GZ_WRITE ? + O_TRUNC : + O_APPEND))); + + /* open the file with the appropriate flags (or just use fd) */ + state->fd = fd > -1 ? fd : ( +#ifdef WIDECHAR + fd == -2 ? _wopen(path, oflag, 0666) : +#endif + open((const char *)path, oflag, 0666)); + if (state->fd == -1) { + free(state->path); + free(state); + return NULL; + } + if (state->mode == GZ_APPEND) { + LSEEK(state->fd, 0, SEEK_END); /* so gzoffset() is correct */ + state->mode = GZ_WRITE; /* simplify later checks */ + } + + /* save the current position for rewinding (only if reading) */ + if (state->mode == GZ_READ) { + state->start = LSEEK(state->fd, 0, SEEK_CUR); + if (state->start == -1) state->start = 0; + } + + /* initialize stream */ + gz_reset(state); + + /* return stream */ + return (gzFile)state; +} + +/* -- see zlib.h -- */ +gzFile ZEXPORT gzopen(const char *path, const char *mode) { + return gz_open(path, -1, mode); +} + +/* -- see zlib.h -- */ +gzFile ZEXPORT gzopen64(const char *path, const char *mode) { + return gz_open(path, -1, mode); +} + +/* -- see zlib.h -- */ +gzFile ZEXPORT gzdopen(int fd, const char *mode) { + char *path; /* identifier for error messages */ + gzFile gz; + + if (fd == -1 || (path = (char *)malloc(7 + 3 * sizeof(int))) == NULL) + return NULL; +#if !defined(NO_snprintf) && !defined(NO_vsnprintf) + (void)snprintf(path, 7 + 3 * sizeof(int), "", fd); +#else + sprintf(path, "", fd); /* for debugging */ +#endif + gz = gz_open(path, fd, mode); + free(path); + return gz; +} + +/* -- see zlib.h -- */ +#ifdef WIDECHAR +gzFile ZEXPORT gzopen_w(const wchar_t *path, const char *mode) { + return gz_open(path, -2, mode); +} +#endif + +/* -- see zlib.h -- */ +int ZEXPORT gzbuffer(gzFile file, unsigned size) { + gz_statep state; + + /* get internal structure and check integrity */ + if (file == NULL) + return -1; + state = (gz_statep)file; + if (state->mode != GZ_READ && state->mode != GZ_WRITE) + return -1; + + /* make sure we haven't already allocated memory */ + if (state->size != 0) + return -1; + + /* check and set requested size */ + if ((size << 1) < size) + return -1; /* need to be able to double it */ + if (size < 8) + size = 8; /* needed to behave well with flushing */ + state->want = size; + return 0; +} + +/* -- see zlib.h -- */ +int ZEXPORT gzrewind(gzFile file) { + gz_statep state; + + /* get internal structure */ + if (file == NULL) + return -1; + state = (gz_statep)file; + + /* check that we're reading and that there's no error */ + if (state->mode != GZ_READ || + (state->err != Z_OK && state->err != Z_BUF_ERROR)) + return -1; + + /* back up and start over */ + if (LSEEK(state->fd, state->start, SEEK_SET) == -1) + return -1; + gz_reset(state); + return 0; +} + +/* -- see zlib.h -- */ +z_off64_t ZEXPORT gzseek64(gzFile file, z_off64_t offset, int whence) { + unsigned n; + z_off64_t ret; + gz_statep state; + + /* get internal structure and check integrity */ + if (file == NULL) + return -1; + state = (gz_statep)file; + if (state->mode != GZ_READ && state->mode != GZ_WRITE) + return -1; + + /* check that there's no error */ + if (state->err != Z_OK && state->err != Z_BUF_ERROR) + return -1; + + /* can only seek from start or relative to current position */ + if (whence != SEEK_SET && whence != SEEK_CUR) + return -1; + + /* normalize offset to a SEEK_CUR specification */ + if (whence == SEEK_SET) + offset -= state->x.pos; + else if (state->seek) + offset += state->skip; + state->seek = 0; + + /* if within raw area while reading, just go there */ + if (state->mode == GZ_READ && state->how == COPY && + state->x.pos + offset >= 0) { + ret = LSEEK(state->fd, offset - (z_off64_t)state->x.have, SEEK_CUR); + if (ret == -1) + return -1; + state->x.have = 0; + state->eof = 0; + state->past = 0; + state->seek = 0; + gz_error(state, Z_OK, NULL); + state->strm.avail_in = 0; + state->x.pos += offset; + return state->x.pos; + } + + /* calculate skip amount, rewinding if needed for back seek when reading */ + if (offset < 0) { + if (state->mode != GZ_READ) /* writing -- can't go backwards */ + return -1; + offset += state->x.pos; + if (offset < 0) /* before start of file! */ + return -1; + if (gzrewind(file) == -1) /* rewind, then skip to offset */ + return -1; + } + + /* if reading, skip what's in output buffer (one less gzgetc() check) */ + if (state->mode == GZ_READ) { + n = GT_OFF(state->x.have) || (z_off64_t)state->x.have > offset ? + (unsigned)offset : state->x.have; + state->x.have -= n; + state->x.next += n; + state->x.pos += n; + offset -= n; + } + + /* request skip (if not zero) */ + if (offset) { + state->seek = 1; + state->skip = offset; + } + return state->x.pos + offset; +} + +/* -- see zlib.h -- */ +z_off_t ZEXPORT gzseek(gzFile file, z_off_t offset, int whence) { + z_off64_t ret; + + ret = gzseek64(file, (z_off64_t)offset, whence); + return ret == (z_off_t)ret ? (z_off_t)ret : -1; +} + +/* -- see zlib.h -- */ +z_off64_t ZEXPORT gztell64(gzFile file) { + gz_statep state; + + /* get internal structure and check integrity */ + if (file == NULL) + return -1; + state = (gz_statep)file; + if (state->mode != GZ_READ && state->mode != GZ_WRITE) + return -1; + + /* return position */ + return state->x.pos + (state->seek ? state->skip : 0); +} + +/* -- see zlib.h -- */ +z_off_t ZEXPORT gztell(gzFile file) { + z_off64_t ret; + + ret = gztell64(file); + return ret == (z_off_t)ret ? (z_off_t)ret : -1; +} + +/* -- see zlib.h -- */ +z_off64_t ZEXPORT gzoffset64(gzFile file) { + z_off64_t offset; + gz_statep state; + + /* get internal structure and check integrity */ + if (file == NULL) + return -1; + state = (gz_statep)file; + if (state->mode != GZ_READ && state->mode != GZ_WRITE) + return -1; + + /* compute and return effective offset in file */ + offset = LSEEK(state->fd, 0, SEEK_CUR); + if (offset == -1) + return -1; + if (state->mode == GZ_READ) /* reading */ + offset -= state->strm.avail_in; /* don't count buffered input */ + return offset; +} + +/* -- see zlib.h -- */ +z_off_t ZEXPORT gzoffset(gzFile file) { + z_off64_t ret; + + ret = gzoffset64(file); + return ret == (z_off_t)ret ? (z_off_t)ret : -1; +} + +/* -- see zlib.h -- */ +int ZEXPORT gzeof(gzFile file) { + gz_statep state; + + /* get internal structure and check integrity */ + if (file == NULL) + return 0; + state = (gz_statep)file; + if (state->mode != GZ_READ && state->mode != GZ_WRITE) + return 0; + + /* return end-of-file state */ + return state->mode == GZ_READ ? state->past : 0; +} + +/* -- see zlib.h -- */ +const char * ZEXPORT gzerror(gzFile file, int *errnum) { + gz_statep state; + + /* get internal structure and check integrity */ + if (file == NULL) + return NULL; + state = (gz_statep)file; + if (state->mode != GZ_READ && state->mode != GZ_WRITE) + return NULL; + + /* return error information */ + if (errnum != NULL) + *errnum = state->err; + return state->err == Z_MEM_ERROR ? "out of memory" : + (state->msg == NULL ? "" : state->msg); +} + +/* -- see zlib.h -- */ +void ZEXPORT gzclearerr(gzFile file) { + gz_statep state; + + /* get internal structure and check integrity */ + if (file == NULL) + return; + state = (gz_statep)file; + if (state->mode != GZ_READ && state->mode != GZ_WRITE) + return; + + /* clear error and end-of-file */ + if (state->mode == GZ_READ) { + state->eof = 0; + state->past = 0; + } + gz_error(state, Z_OK, NULL); +} + +/* Create an error message in allocated memory and set state->err and + state->msg accordingly. Free any previous error message already there. Do + not try to free or allocate space if the error is Z_MEM_ERROR (out of + memory). Simply save the error message as a static string. If there is an + allocation failure constructing the error message, then convert the error to + out of memory. */ +void ZLIB_INTERNAL gz_error(gz_statep state, int err, const char *msg) { + /* free previously allocated message and clear */ + if (state->msg != NULL) { + if (state->err != Z_MEM_ERROR) + free(state->msg); + state->msg = NULL; + } + + /* if fatal, set state->x.have to 0 so that the gzgetc() macro fails */ + if (err != Z_OK && err != Z_BUF_ERROR) + state->x.have = 0; + + /* set error code, and if no message, then done */ + state->err = err; + if (msg == NULL) + return; + + /* for an out of memory error, return literal string when requested */ + if (err == Z_MEM_ERROR) + return; + + /* construct error message with path */ + if ((state->msg = (char *)malloc(strlen(state->path) + strlen(msg) + 3)) == + NULL) { + state->err = Z_MEM_ERROR; + return; + } +#if !defined(NO_snprintf) && !defined(NO_vsnprintf) + (void)snprintf(state->msg, strlen(state->path) + strlen(msg) + 3, + "%s%s%s", state->path, ": ", msg); +#else + strcpy(state->msg, state->path); + strcat(state->msg, ": "); + strcat(state->msg, msg); +#endif +} + +/* portably return maximum value for an int (when limits.h presumed not + available) -- we need to do this to cover cases where 2's complement not + used, since C standard permits 1's complement and sign-bit representations, + otherwise we could just use ((unsigned)-1) >> 1 */ +unsigned ZLIB_INTERNAL gz_intmax(void) { +#ifdef INT_MAX + return INT_MAX; +#else + unsigned p = 1, q; + do { + q = p; + p <<= 1; + p++; + } while (p > q); + return q >> 1; +#endif +} diff --git a/c-blosc/internal-complibs/zlib-1.3.1/gzread.c b/c-blosc/internal-complibs/zlib-1.3.1/gzread.c new file mode 100644 index 0000000..4168cbc --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.3.1/gzread.c @@ -0,0 +1,602 @@ +/* gzread.c -- zlib functions for reading gzip files + * Copyright (C) 2004-2017 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#include "gzguts.h" + +/* Use read() to load a buffer -- return -1 on error, otherwise 0. Read from + state->fd, and update state->eof, state->err, and state->msg as appropriate. + This function needs to loop on read(), since read() is not guaranteed to + read the number of bytes requested, depending on the type of descriptor. */ +local int gz_load(gz_statep state, unsigned char *buf, unsigned len, + unsigned *have) { + int ret; + unsigned get, max = ((unsigned)-1 >> 2) + 1; + + *have = 0; + do { + get = len - *have; + if (get > max) + get = max; + ret = read(state->fd, buf + *have, get); + if (ret <= 0) + break; + *have += (unsigned)ret; + } while (*have < len); + if (ret < 0) { + gz_error(state, Z_ERRNO, zstrerror()); + return -1; + } + if (ret == 0) + state->eof = 1; + return 0; +} + +/* Load up input buffer and set eof flag if last data loaded -- return -1 on + error, 0 otherwise. Note that the eof flag is set when the end of the input + file is reached, even though there may be unused data in the buffer. Once + that data has been used, no more attempts will be made to read the file. + If strm->avail_in != 0, then the current data is moved to the beginning of + the input buffer, and then the remainder of the buffer is loaded with the + available data from the input file. */ +local int gz_avail(gz_statep state) { + unsigned got; + z_streamp strm = &(state->strm); + + if (state->err != Z_OK && state->err != Z_BUF_ERROR) + return -1; + if (state->eof == 0) { + if (strm->avail_in) { /* copy what's there to the start */ + unsigned char *p = state->in; + unsigned const char *q = strm->next_in; + unsigned n = strm->avail_in; + do { + *p++ = *q++; + } while (--n); + } + if (gz_load(state, state->in + strm->avail_in, + state->size - strm->avail_in, &got) == -1) + return -1; + strm->avail_in += got; + strm->next_in = state->in; + } + return 0; +} + +/* Look for gzip header, set up for inflate or copy. state->x.have must be 0. + If this is the first time in, allocate required memory. state->how will be + left unchanged if there is no more input data available, will be set to COPY + if there is no gzip header and direct copying will be performed, or it will + be set to GZIP for decompression. If direct copying, then leftover input + data from the input buffer will be copied to the output buffer. In that + case, all further file reads will be directly to either the output buffer or + a user buffer. If decompressing, the inflate state will be initialized. + gz_look() will return 0 on success or -1 on failure. */ +local int gz_look(gz_statep state) { + z_streamp strm = &(state->strm); + + /* allocate read buffers and inflate memory */ + if (state->size == 0) { + /* allocate buffers */ + state->in = (unsigned char *)malloc(state->want); + state->out = (unsigned char *)malloc(state->want << 1); + if (state->in == NULL || state->out == NULL) { + free(state->out); + free(state->in); + gz_error(state, Z_MEM_ERROR, "out of memory"); + return -1; + } + state->size = state->want; + + /* allocate inflate memory */ + state->strm.zalloc = Z_NULL; + state->strm.zfree = Z_NULL; + state->strm.opaque = Z_NULL; + state->strm.avail_in = 0; + state->strm.next_in = Z_NULL; + if (inflateInit2(&(state->strm), 15 + 16) != Z_OK) { /* gunzip */ + free(state->out); + free(state->in); + state->size = 0; + gz_error(state, Z_MEM_ERROR, "out of memory"); + return -1; + } + } + + /* get at least the magic bytes in the input buffer */ + if (strm->avail_in < 2) { + if (gz_avail(state) == -1) + return -1; + if (strm->avail_in == 0) + return 0; + } + + /* look for gzip magic bytes -- if there, do gzip decoding (note: there is + a logical dilemma here when considering the case of a partially written + gzip file, to wit, if a single 31 byte is written, then we cannot tell + whether this is a single-byte file, or just a partially written gzip + file -- for here we assume that if a gzip file is being written, then + the header will be written in a single operation, so that reading a + single byte is sufficient indication that it is not a gzip file) */ + if (strm->avail_in > 1 && + strm->next_in[0] == 31 && strm->next_in[1] == 139) { + inflateReset(strm); + state->how = GZIP; + state->direct = 0; + return 0; + } + + /* no gzip header -- if we were decoding gzip before, then this is trailing + garbage. Ignore the trailing garbage and finish. */ + if (state->direct == 0) { + strm->avail_in = 0; + state->eof = 1; + state->x.have = 0; + return 0; + } + + /* doing raw i/o, copy any leftover input to output -- this assumes that + the output buffer is larger than the input buffer, which also assures + space for gzungetc() */ + state->x.next = state->out; + memcpy(state->x.next, strm->next_in, strm->avail_in); + state->x.have = strm->avail_in; + strm->avail_in = 0; + state->how = COPY; + state->direct = 1; + return 0; +} + +/* Decompress from input to the provided next_out and avail_out in the state. + On return, state->x.have and state->x.next point to the just decompressed + data. If the gzip stream completes, state->how is reset to LOOK to look for + the next gzip stream or raw data, once state->x.have is depleted. Returns 0 + on success, -1 on failure. */ +local int gz_decomp(gz_statep state) { + int ret = Z_OK; + unsigned had; + z_streamp strm = &(state->strm); + + /* fill output buffer up to end of deflate stream */ + had = strm->avail_out; + do { + /* get more input for inflate() */ + if (strm->avail_in == 0 && gz_avail(state) == -1) + return -1; + if (strm->avail_in == 0) { + gz_error(state, Z_BUF_ERROR, "unexpected end of file"); + break; + } + + /* decompress and handle errors */ + ret = inflate(strm, Z_NO_FLUSH); + if (ret == Z_STREAM_ERROR || ret == Z_NEED_DICT) { + gz_error(state, Z_STREAM_ERROR, + "internal error: inflate stream corrupt"); + return -1; + } + if (ret == Z_MEM_ERROR) { + gz_error(state, Z_MEM_ERROR, "out of memory"); + return -1; + } + if (ret == Z_DATA_ERROR) { /* deflate stream invalid */ + gz_error(state, Z_DATA_ERROR, + strm->msg == NULL ? "compressed data error" : strm->msg); + return -1; + } + } while (strm->avail_out && ret != Z_STREAM_END); + + /* update available output */ + state->x.have = had - strm->avail_out; + state->x.next = strm->next_out - state->x.have; + + /* if the gzip stream completed successfully, look for another */ + if (ret == Z_STREAM_END) + state->how = LOOK; + + /* good decompression */ + return 0; +} + +/* Fetch data and put it in the output buffer. Assumes state->x.have is 0. + Data is either copied from the input file or decompressed from the input + file depending on state->how. If state->how is LOOK, then a gzip header is + looked for to determine whether to copy or decompress. Returns -1 on error, + otherwise 0. gz_fetch() will leave state->how as COPY or GZIP unless the + end of the input file has been reached and all data has been processed. */ +local int gz_fetch(gz_statep state) { + z_streamp strm = &(state->strm); + + do { + switch(state->how) { + case LOOK: /* -> LOOK, COPY (only if never GZIP), or GZIP */ + if (gz_look(state) == -1) + return -1; + if (state->how == LOOK) + return 0; + break; + case COPY: /* -> COPY */ + if (gz_load(state, state->out, state->size << 1, &(state->x.have)) + == -1) + return -1; + state->x.next = state->out; + return 0; + case GZIP: /* -> GZIP or LOOK (if end of gzip stream) */ + strm->avail_out = state->size << 1; + strm->next_out = state->out; + if (gz_decomp(state) == -1) + return -1; + } + } while (state->x.have == 0 && (!state->eof || strm->avail_in)); + return 0; +} + +/* Skip len uncompressed bytes of output. Return -1 on error, 0 on success. */ +local int gz_skip(gz_statep state, z_off64_t len) { + unsigned n; + + /* skip over len bytes or reach end-of-file, whichever comes first */ + while (len) + /* skip over whatever is in output buffer */ + if (state->x.have) { + n = GT_OFF(state->x.have) || (z_off64_t)state->x.have > len ? + (unsigned)len : state->x.have; + state->x.have -= n; + state->x.next += n; + state->x.pos += n; + len -= n; + } + + /* output buffer empty -- return if we're at the end of the input */ + else if (state->eof && state->strm.avail_in == 0) + break; + + /* need more data to skip -- load up output buffer */ + else { + /* get more output, looking for header if required */ + if (gz_fetch(state) == -1) + return -1; + } + return 0; +} + +/* Read len bytes into buf from file, or less than len up to the end of the + input. Return the number of bytes read. If zero is returned, either the + end of file was reached, or there was an error. state->err must be + consulted in that case to determine which. */ +local z_size_t gz_read(gz_statep state, voidp buf, z_size_t len) { + z_size_t got; + unsigned n; + + /* if len is zero, avoid unnecessary operations */ + if (len == 0) + return 0; + + /* process a skip request */ + if (state->seek) { + state->seek = 0; + if (gz_skip(state, state->skip) == -1) + return 0; + } + + /* get len bytes to buf, or less than len if at the end */ + got = 0; + do { + /* set n to the maximum amount of len that fits in an unsigned int */ + n = (unsigned)-1; + if (n > len) + n = (unsigned)len; + + /* first just try copying data from the output buffer */ + if (state->x.have) { + if (state->x.have < n) + n = state->x.have; + memcpy(buf, state->x.next, n); + state->x.next += n; + state->x.have -= n; + } + + /* output buffer empty -- return if we're at the end of the input */ + else if (state->eof && state->strm.avail_in == 0) { + state->past = 1; /* tried to read past end */ + break; + } + + /* need output data -- for small len or new stream load up our output + buffer */ + else if (state->how == LOOK || n < (state->size << 1)) { + /* get more output, looking for header if required */ + if (gz_fetch(state) == -1) + return 0; + continue; /* no progress yet -- go back to copy above */ + /* the copy above assures that we will leave with space in the + output buffer, allowing at least one gzungetc() to succeed */ + } + + /* large len -- read directly into user buffer */ + else if (state->how == COPY) { /* read directly */ + if (gz_load(state, (unsigned char *)buf, n, &n) == -1) + return 0; + } + + /* large len -- decompress directly into user buffer */ + else { /* state->how == GZIP */ + state->strm.avail_out = n; + state->strm.next_out = (unsigned char *)buf; + if (gz_decomp(state) == -1) + return 0; + n = state->x.have; + state->x.have = 0; + } + + /* update progress */ + len -= n; + buf = (char *)buf + n; + got += n; + state->x.pos += n; + } while (len); + + /* return number of bytes read into user buffer */ + return got; +} + +/* -- see zlib.h -- */ +int ZEXPORT gzread(gzFile file, voidp buf, unsigned len) { + gz_statep state; + + /* get internal structure */ + if (file == NULL) + return -1; + state = (gz_statep)file; + + /* check that we're reading and that there's no (serious) error */ + if (state->mode != GZ_READ || + (state->err != Z_OK && state->err != Z_BUF_ERROR)) + return -1; + + /* since an int is returned, make sure len fits in one, otherwise return + with an error (this avoids a flaw in the interface) */ + if ((int)len < 0) { + gz_error(state, Z_STREAM_ERROR, "request does not fit in an int"); + return -1; + } + + /* read len or fewer bytes to buf */ + len = (unsigned)gz_read(state, buf, len); + + /* check for an error */ + if (len == 0 && state->err != Z_OK && state->err != Z_BUF_ERROR) + return -1; + + /* return the number of bytes read (this is assured to fit in an int) */ + return (int)len; +} + +/* -- see zlib.h -- */ +z_size_t ZEXPORT gzfread(voidp buf, z_size_t size, z_size_t nitems, gzFile file) { + z_size_t len; + gz_statep state; + + /* get internal structure */ + if (file == NULL) + return 0; + state = (gz_statep)file; + + /* check that we're reading and that there's no (serious) error */ + if (state->mode != GZ_READ || + (state->err != Z_OK && state->err != Z_BUF_ERROR)) + return 0; + + /* compute bytes to read -- error on overflow */ + len = nitems * size; + if (size && len / size != nitems) { + gz_error(state, Z_STREAM_ERROR, "request does not fit in a size_t"); + return 0; + } + + /* read len or fewer bytes to buf, return the number of full items read */ + return len ? gz_read(state, buf, len) / size : 0; +} + +/* -- see zlib.h -- */ +#ifdef Z_PREFIX_SET +# undef z_gzgetc +#else +# undef gzgetc +#endif +int ZEXPORT gzgetc(gzFile file) { + unsigned char buf[1]; + gz_statep state; + + /* get internal structure */ + if (file == NULL) + return -1; + state = (gz_statep)file; + + /* check that we're reading and that there's no (serious) error */ + if (state->mode != GZ_READ || + (state->err != Z_OK && state->err != Z_BUF_ERROR)) + return -1; + + /* try output buffer (no need to check for skip request) */ + if (state->x.have) { + state->x.have--; + state->x.pos++; + return *(state->x.next)++; + } + + /* nothing there -- try gz_read() */ + return gz_read(state, buf, 1) < 1 ? -1 : buf[0]; +} + +int ZEXPORT gzgetc_(gzFile file) { + return gzgetc(file); +} + +/* -- see zlib.h -- */ +int ZEXPORT gzungetc(int c, gzFile file) { + gz_statep state; + + /* get internal structure */ + if (file == NULL) + return -1; + state = (gz_statep)file; + + /* in case this was just opened, set up the input buffer */ + if (state->mode == GZ_READ && state->how == LOOK && state->x.have == 0) + (void)gz_look(state); + + /* check that we're reading and that there's no (serious) error */ + if (state->mode != GZ_READ || + (state->err != Z_OK && state->err != Z_BUF_ERROR)) + return -1; + + /* process a skip request */ + if (state->seek) { + state->seek = 0; + if (gz_skip(state, state->skip) == -1) + return -1; + } + + /* can't push EOF */ + if (c < 0) + return -1; + + /* if output buffer empty, put byte at end (allows more pushing) */ + if (state->x.have == 0) { + state->x.have = 1; + state->x.next = state->out + (state->size << 1) - 1; + state->x.next[0] = (unsigned char)c; + state->x.pos--; + state->past = 0; + return c; + } + + /* if no room, give up (must have already done a gzungetc()) */ + if (state->x.have == (state->size << 1)) { + gz_error(state, Z_DATA_ERROR, "out of room to push characters"); + return -1; + } + + /* slide output data if needed and insert byte before existing data */ + if (state->x.next == state->out) { + unsigned char *src = state->out + state->x.have; + unsigned char *dest = state->out + (state->size << 1); + while (src > state->out) + *--dest = *--src; + state->x.next = dest; + } + state->x.have++; + state->x.next--; + state->x.next[0] = (unsigned char)c; + state->x.pos--; + state->past = 0; + return c; +} + +/* -- see zlib.h -- */ +char * ZEXPORT gzgets(gzFile file, char *buf, int len) { + unsigned left, n; + char *str; + unsigned char *eol; + gz_statep state; + + /* check parameters and get internal structure */ + if (file == NULL || buf == NULL || len < 1) + return NULL; + state = (gz_statep)file; + + /* check that we're reading and that there's no (serious) error */ + if (state->mode != GZ_READ || + (state->err != Z_OK && state->err != Z_BUF_ERROR)) + return NULL; + + /* process a skip request */ + if (state->seek) { + state->seek = 0; + if (gz_skip(state, state->skip) == -1) + return NULL; + } + + /* copy output bytes up to new line or len - 1, whichever comes first -- + append a terminating zero to the string (we don't check for a zero in + the contents, let the user worry about that) */ + str = buf; + left = (unsigned)len - 1; + if (left) do { + /* assure that something is in the output buffer */ + if (state->x.have == 0 && gz_fetch(state) == -1) + return NULL; /* error */ + if (state->x.have == 0) { /* end of file */ + state->past = 1; /* read past end */ + break; /* return what we have */ + } + + /* look for end-of-line in current output buffer */ + n = state->x.have > left ? left : state->x.have; + eol = (unsigned char *)memchr(state->x.next, '\n', n); + if (eol != NULL) + n = (unsigned)(eol - state->x.next) + 1; + + /* copy through end-of-line, or remainder if not found */ + memcpy(buf, state->x.next, n); + state->x.have -= n; + state->x.next += n; + state->x.pos += n; + left -= n; + buf += n; + } while (left && eol == NULL); + + /* return terminated string, or if nothing, end of file */ + if (buf == str) + return NULL; + buf[0] = 0; + return str; +} + +/* -- see zlib.h -- */ +int ZEXPORT gzdirect(gzFile file) { + gz_statep state; + + /* get internal structure */ + if (file == NULL) + return 0; + state = (gz_statep)file; + + /* if the state is not known, but we can find out, then do so (this is + mainly for right after a gzopen() or gzdopen()) */ + if (state->mode == GZ_READ && state->how == LOOK && state->x.have == 0) + (void)gz_look(state); + + /* return 1 if transparent, 0 if processing a gzip stream */ + return state->direct; +} + +/* -- see zlib.h -- */ +int ZEXPORT gzclose_r(gzFile file) { + int ret, err; + gz_statep state; + + /* get internal structure */ + if (file == NULL) + return Z_STREAM_ERROR; + state = (gz_statep)file; + + /* check that we're reading */ + if (state->mode != GZ_READ) + return Z_STREAM_ERROR; + + /* free memory and close file */ + if (state->size) { + inflateEnd(&(state->strm)); + free(state->out); + free(state->in); + } + err = state->err == Z_BUF_ERROR ? Z_BUF_ERROR : Z_OK; + gz_error(state, Z_OK, NULL); + free(state->path); + ret = close(state->fd); + free(state); + return ret ? Z_ERRNO : err; +} diff --git a/c-blosc/internal-complibs/zlib-1.3.1/gzwrite.c b/c-blosc/internal-complibs/zlib-1.3.1/gzwrite.c new file mode 100644 index 0000000..435b462 --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.3.1/gzwrite.c @@ -0,0 +1,631 @@ +/* gzwrite.c -- zlib functions for writing gzip files + * Copyright (C) 2004-2019 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#include "gzguts.h" + +/* Initialize state for writing a gzip file. Mark initialization by setting + state->size to non-zero. Return -1 on a memory allocation failure, or 0 on + success. */ +local int gz_init(gz_statep state) { + int ret; + z_streamp strm = &(state->strm); + + /* allocate input buffer (double size for gzprintf) */ + state->in = (unsigned char *)malloc(state->want << 1); + if (state->in == NULL) { + gz_error(state, Z_MEM_ERROR, "out of memory"); + return -1; + } + + /* only need output buffer and deflate state if compressing */ + if (!state->direct) { + /* allocate output buffer */ + state->out = (unsigned char *)malloc(state->want); + if (state->out == NULL) { + free(state->in); + gz_error(state, Z_MEM_ERROR, "out of memory"); + return -1; + } + + /* allocate deflate memory, set up for gzip compression */ + strm->zalloc = Z_NULL; + strm->zfree = Z_NULL; + strm->opaque = Z_NULL; + ret = deflateInit2(strm, state->level, Z_DEFLATED, + MAX_WBITS + 16, DEF_MEM_LEVEL, state->strategy); + if (ret != Z_OK) { + free(state->out); + free(state->in); + gz_error(state, Z_MEM_ERROR, "out of memory"); + return -1; + } + strm->next_in = NULL; + } + + /* mark state as initialized */ + state->size = state->want; + + /* initialize write buffer if compressing */ + if (!state->direct) { + strm->avail_out = state->size; + strm->next_out = state->out; + state->x.next = strm->next_out; + } + return 0; +} + +/* Compress whatever is at avail_in and next_in and write to the output file. + Return -1 if there is an error writing to the output file or if gz_init() + fails to allocate memory, otherwise 0. flush is assumed to be a valid + deflate() flush value. If flush is Z_FINISH, then the deflate() state is + reset to start a new gzip stream. If gz->direct is true, then simply write + to the output file without compressing, and ignore flush. */ +local int gz_comp(gz_statep state, int flush) { + int ret, writ; + unsigned have, put, max = ((unsigned)-1 >> 2) + 1; + z_streamp strm = &(state->strm); + + /* allocate memory if this is the first time through */ + if (state->size == 0 && gz_init(state) == -1) + return -1; + + /* write directly if requested */ + if (state->direct) { + while (strm->avail_in) { + put = strm->avail_in > max ? max : strm->avail_in; + writ = write(state->fd, strm->next_in, put); + if (writ < 0) { + gz_error(state, Z_ERRNO, zstrerror()); + return -1; + } + strm->avail_in -= (unsigned)writ; + strm->next_in += writ; + } + return 0; + } + + /* check for a pending reset */ + if (state->reset) { + /* don't start a new gzip member unless there is data to write */ + if (strm->avail_in == 0) + return 0; + deflateReset(strm); + state->reset = 0; + } + + /* run deflate() on provided input until it produces no more output */ + ret = Z_OK; + do { + /* write out current buffer contents if full, or if flushing, but if + doing Z_FINISH then don't write until we get to Z_STREAM_END */ + if (strm->avail_out == 0 || (flush != Z_NO_FLUSH && + (flush != Z_FINISH || ret == Z_STREAM_END))) { + while (strm->next_out > state->x.next) { + put = strm->next_out - state->x.next > (int)max ? max : + (unsigned)(strm->next_out - state->x.next); + writ = write(state->fd, state->x.next, put); + if (writ < 0) { + gz_error(state, Z_ERRNO, zstrerror()); + return -1; + } + state->x.next += writ; + } + if (strm->avail_out == 0) { + strm->avail_out = state->size; + strm->next_out = state->out; + state->x.next = state->out; + } + } + + /* compress */ + have = strm->avail_out; + ret = deflate(strm, flush); + if (ret == Z_STREAM_ERROR) { + gz_error(state, Z_STREAM_ERROR, + "internal error: deflate stream corrupt"); + return -1; + } + have -= strm->avail_out; + } while (have); + + /* if that completed a deflate stream, allow another to start */ + if (flush == Z_FINISH) + state->reset = 1; + + /* all done, no errors */ + return 0; +} + +/* Compress len zeros to output. Return -1 on a write error or memory + allocation failure by gz_comp(), or 0 on success. */ +local int gz_zero(gz_statep state, z_off64_t len) { + int first; + unsigned n; + z_streamp strm = &(state->strm); + + /* consume whatever's left in the input buffer */ + if (strm->avail_in && gz_comp(state, Z_NO_FLUSH) == -1) + return -1; + + /* compress len zeros (len guaranteed > 0) */ + first = 1; + while (len) { + n = GT_OFF(state->size) || (z_off64_t)state->size > len ? + (unsigned)len : state->size; + if (first) { + memset(state->in, 0, n); + first = 0; + } + strm->avail_in = n; + strm->next_in = state->in; + state->x.pos += n; + if (gz_comp(state, Z_NO_FLUSH) == -1) + return -1; + len -= n; + } + return 0; +} + +/* Write len bytes from buf to file. Return the number of bytes written. If + the returned value is less than len, then there was an error. */ +local z_size_t gz_write(gz_statep state, voidpc buf, z_size_t len) { + z_size_t put = len; + + /* if len is zero, avoid unnecessary operations */ + if (len == 0) + return 0; + + /* allocate memory if this is the first time through */ + if (state->size == 0 && gz_init(state) == -1) + return 0; + + /* check for seek request */ + if (state->seek) { + state->seek = 0; + if (gz_zero(state, state->skip) == -1) + return 0; + } + + /* for small len, copy to input buffer, otherwise compress directly */ + if (len < state->size) { + /* copy to input buffer, compress when full */ + do { + unsigned have, copy; + + if (state->strm.avail_in == 0) + state->strm.next_in = state->in; + have = (unsigned)((state->strm.next_in + state->strm.avail_in) - + state->in); + copy = state->size - have; + if (copy > len) + copy = (unsigned)len; + memcpy(state->in + have, buf, copy); + state->strm.avail_in += copy; + state->x.pos += copy; + buf = (const char *)buf + copy; + len -= copy; + if (len && gz_comp(state, Z_NO_FLUSH) == -1) + return 0; + } while (len); + } + else { + /* consume whatever's left in the input buffer */ + if (state->strm.avail_in && gz_comp(state, Z_NO_FLUSH) == -1) + return 0; + + /* directly compress user buffer to file */ + state->strm.next_in = (z_const Bytef *)buf; + do { + unsigned n = (unsigned)-1; + if (n > len) + n = (unsigned)len; + state->strm.avail_in = n; + state->x.pos += n; + if (gz_comp(state, Z_NO_FLUSH) == -1) + return 0; + len -= n; + } while (len); + } + + /* input was all buffered or compressed */ + return put; +} + +/* -- see zlib.h -- */ +int ZEXPORT gzwrite(gzFile file, voidpc buf, unsigned len) { + gz_statep state; + + /* get internal structure */ + if (file == NULL) + return 0; + state = (gz_statep)file; + + /* check that we're writing and that there's no error */ + if (state->mode != GZ_WRITE || state->err != Z_OK) + return 0; + + /* since an int is returned, make sure len fits in one, otherwise return + with an error (this avoids a flaw in the interface) */ + if ((int)len < 0) { + gz_error(state, Z_DATA_ERROR, "requested length does not fit in int"); + return 0; + } + + /* write len bytes from buf (the return value will fit in an int) */ + return (int)gz_write(state, buf, len); +} + +/* -- see zlib.h -- */ +z_size_t ZEXPORT gzfwrite(voidpc buf, z_size_t size, z_size_t nitems, + gzFile file) { + z_size_t len; + gz_statep state; + + /* get internal structure */ + if (file == NULL) + return 0; + state = (gz_statep)file; + + /* check that we're writing and that there's no error */ + if (state->mode != GZ_WRITE || state->err != Z_OK) + return 0; + + /* compute bytes to read -- error on overflow */ + len = nitems * size; + if (size && len / size != nitems) { + gz_error(state, Z_STREAM_ERROR, "request does not fit in a size_t"); + return 0; + } + + /* write len bytes to buf, return the number of full items written */ + return len ? gz_write(state, buf, len) / size : 0; +} + +/* -- see zlib.h -- */ +int ZEXPORT gzputc(gzFile file, int c) { + unsigned have; + unsigned char buf[1]; + gz_statep state; + z_streamp strm; + + /* get internal structure */ + if (file == NULL) + return -1; + state = (gz_statep)file; + strm = &(state->strm); + + /* check that we're writing and that there's no error */ + if (state->mode != GZ_WRITE || state->err != Z_OK) + return -1; + + /* check for seek request */ + if (state->seek) { + state->seek = 0; + if (gz_zero(state, state->skip) == -1) + return -1; + } + + /* try writing to input buffer for speed (state->size == 0 if buffer not + initialized) */ + if (state->size) { + if (strm->avail_in == 0) + strm->next_in = state->in; + have = (unsigned)((strm->next_in + strm->avail_in) - state->in); + if (have < state->size) { + state->in[have] = (unsigned char)c; + strm->avail_in++; + state->x.pos++; + return c & 0xff; + } + } + + /* no room in buffer or not initialized, use gz_write() */ + buf[0] = (unsigned char)c; + if (gz_write(state, buf, 1) != 1) + return -1; + return c & 0xff; +} + +/* -- see zlib.h -- */ +int ZEXPORT gzputs(gzFile file, const char *s) { + z_size_t len, put; + gz_statep state; + + /* get internal structure */ + if (file == NULL) + return -1; + state = (gz_statep)file; + + /* check that we're writing and that there's no error */ + if (state->mode != GZ_WRITE || state->err != Z_OK) + return -1; + + /* write string */ + len = strlen(s); + if ((int)len < 0 || (unsigned)len != len) { + gz_error(state, Z_STREAM_ERROR, "string length does not fit in int"); + return -1; + } + put = gz_write(state, s, len); + return put < len ? -1 : (int)len; +} + +#if defined(STDC) || defined(Z_HAVE_STDARG_H) +#include + +/* -- see zlib.h -- */ +int ZEXPORTVA gzvprintf(gzFile file, const char *format, va_list va) { + int len; + unsigned left; + char *next; + gz_statep state; + z_streamp strm; + + /* get internal structure */ + if (file == NULL) + return Z_STREAM_ERROR; + state = (gz_statep)file; + strm = &(state->strm); + + /* check that we're writing and that there's no error */ + if (state->mode != GZ_WRITE || state->err != Z_OK) + return Z_STREAM_ERROR; + + /* make sure we have some buffer space */ + if (state->size == 0 && gz_init(state) == -1) + return state->err; + + /* check for seek request */ + if (state->seek) { + state->seek = 0; + if (gz_zero(state, state->skip) == -1) + return state->err; + } + + /* do the printf() into the input buffer, put length in len -- the input + buffer is double-sized just for this function, so there is guaranteed to + be state->size bytes available after the current contents */ + if (strm->avail_in == 0) + strm->next_in = state->in; + next = (char *)(state->in + (strm->next_in - state->in) + strm->avail_in); + next[state->size - 1] = 0; +#ifdef NO_vsnprintf +# ifdef HAS_vsprintf_void + (void)vsprintf(next, format, va); + for (len = 0; len < state->size; len++) + if (next[len] == 0) break; +# else + len = vsprintf(next, format, va); +# endif +#else +# ifdef HAS_vsnprintf_void + (void)vsnprintf(next, state->size, format, va); + len = strlen(next); +# else + len = vsnprintf(next, state->size, format, va); +# endif +#endif + + /* check that printf() results fit in buffer */ + if (len == 0 || (unsigned)len >= state->size || next[state->size - 1] != 0) + return 0; + + /* update buffer and position, compress first half if past that */ + strm->avail_in += (unsigned)len; + state->x.pos += len; + if (strm->avail_in >= state->size) { + left = strm->avail_in - state->size; + strm->avail_in = state->size; + if (gz_comp(state, Z_NO_FLUSH) == -1) + return state->err; + memmove(state->in, state->in + state->size, left); + strm->next_in = state->in; + strm->avail_in = left; + } + return len; +} + +int ZEXPORTVA gzprintf(gzFile file, const char *format, ...) { + va_list va; + int ret; + + va_start(va, format); + ret = gzvprintf(file, format, va); + va_end(va); + return ret; +} + +#else /* !STDC && !Z_HAVE_STDARG_H */ + +/* -- see zlib.h -- */ +int ZEXPORTVA gzprintf(gzFile file, const char *format, int a1, int a2, int a3, + int a4, int a5, int a6, int a7, int a8, int a9, int a10, + int a11, int a12, int a13, int a14, int a15, int a16, + int a17, int a18, int a19, int a20) { + unsigned len, left; + char *next; + gz_statep state; + z_streamp strm; + + /* get internal structure */ + if (file == NULL) + return Z_STREAM_ERROR; + state = (gz_statep)file; + strm = &(state->strm); + + /* check that can really pass pointer in ints */ + if (sizeof(int) != sizeof(void *)) + return Z_STREAM_ERROR; + + /* check that we're writing and that there's no error */ + if (state->mode != GZ_WRITE || state->err != Z_OK) + return Z_STREAM_ERROR; + + /* make sure we have some buffer space */ + if (state->size == 0 && gz_init(state) == -1) + return state->error; + + /* check for seek request */ + if (state->seek) { + state->seek = 0; + if (gz_zero(state, state->skip) == -1) + return state->error; + } + + /* do the printf() into the input buffer, put length in len -- the input + buffer is double-sized just for this function, so there is guaranteed to + be state->size bytes available after the current contents */ + if (strm->avail_in == 0) + strm->next_in = state->in; + next = (char *)(strm->next_in + strm->avail_in); + next[state->size - 1] = 0; +#ifdef NO_snprintf +# ifdef HAS_sprintf_void + sprintf(next, format, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, + a13, a14, a15, a16, a17, a18, a19, a20); + for (len = 0; len < size; len++) + if (next[len] == 0) + break; +# else + len = sprintf(next, format, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, + a12, a13, a14, a15, a16, a17, a18, a19, a20); +# endif +#else +# ifdef HAS_snprintf_void + snprintf(next, state->size, format, a1, a2, a3, a4, a5, a6, a7, a8, a9, + a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20); + len = strlen(next); +# else + len = snprintf(next, state->size, format, a1, a2, a3, a4, a5, a6, a7, a8, + a9, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19, a20); +# endif +#endif + + /* check that printf() results fit in buffer */ + if (len == 0 || len >= state->size || next[state->size - 1] != 0) + return 0; + + /* update buffer and position, compress first half if past that */ + strm->avail_in += len; + state->x.pos += len; + if (strm->avail_in >= state->size) { + left = strm->avail_in - state->size; + strm->avail_in = state->size; + if (gz_comp(state, Z_NO_FLUSH) == -1) + return state->err; + memmove(state->in, state->in + state->size, left); + strm->next_in = state->in; + strm->avail_in = left; + } + return (int)len; +} + +#endif + +/* -- see zlib.h -- */ +int ZEXPORT gzflush(gzFile file, int flush) { + gz_statep state; + + /* get internal structure */ + if (file == NULL) + return Z_STREAM_ERROR; + state = (gz_statep)file; + + /* check that we're writing and that there's no error */ + if (state->mode != GZ_WRITE || state->err != Z_OK) + return Z_STREAM_ERROR; + + /* check flush parameter */ + if (flush < 0 || flush > Z_FINISH) + return Z_STREAM_ERROR; + + /* check for seek request */ + if (state->seek) { + state->seek = 0; + if (gz_zero(state, state->skip) == -1) + return state->err; + } + + /* compress remaining data with requested flush */ + (void)gz_comp(state, flush); + return state->err; +} + +/* -- see zlib.h -- */ +int ZEXPORT gzsetparams(gzFile file, int level, int strategy) { + gz_statep state; + z_streamp strm; + + /* get internal structure */ + if (file == NULL) + return Z_STREAM_ERROR; + state = (gz_statep)file; + strm = &(state->strm); + + /* check that we're writing and that there's no error */ + if (state->mode != GZ_WRITE || state->err != Z_OK || state->direct) + return Z_STREAM_ERROR; + + /* if no change is requested, then do nothing */ + if (level == state->level && strategy == state->strategy) + return Z_OK; + + /* check for seek request */ + if (state->seek) { + state->seek = 0; + if (gz_zero(state, state->skip) == -1) + return state->err; + } + + /* change compression parameters for subsequent input */ + if (state->size) { + /* flush previous input with previous parameters before changing */ + if (strm->avail_in && gz_comp(state, Z_BLOCK) == -1) + return state->err; + deflateParams(strm, level, strategy); + } + state->level = level; + state->strategy = strategy; + return Z_OK; +} + +/* -- see zlib.h -- */ +int ZEXPORT gzclose_w(gzFile file) { + int ret = Z_OK; + gz_statep state; + + /* get internal structure */ + if (file == NULL) + return Z_STREAM_ERROR; + state = (gz_statep)file; + + /* check that we're writing */ + if (state->mode != GZ_WRITE) + return Z_STREAM_ERROR; + + /* check for seek request */ + if (state->seek) { + state->seek = 0; + if (gz_zero(state, state->skip) == -1) + ret = state->err; + } + + /* flush, free memory, and close file */ + if (gz_comp(state, Z_FINISH) == -1) + ret = state->err; + if (state->size) { + if (!state->direct) { + (void)deflateEnd(&(state->strm)); + free(state->out); + } + free(state->in); + } + gz_error(state, Z_OK, NULL); + free(state->path); + if (close(state->fd) == -1) + ret = Z_ERRNO; + free(state); + return ret; +} diff --git a/c-blosc/internal-complibs/zlib-1.3.1/infback.c b/c-blosc/internal-complibs/zlib-1.3.1/infback.c new file mode 100644 index 0000000..e7b25b3 --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.3.1/infback.c @@ -0,0 +1,628 @@ +/* infback.c -- inflate using a call-back interface + * Copyright (C) 1995-2022 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* + This code is largely copied from inflate.c. Normally either infback.o or + inflate.o would be linked into an application--not both. The interface + with inffast.c is retained so that optimized assembler-coded versions of + inflate_fast() can be used with either inflate.c or infback.c. + */ + +#include "zutil.h" +#include "inftrees.h" +#include "inflate.h" +#include "inffast.h" + +/* + strm provides memory allocation functions in zalloc and zfree, or + Z_NULL to use the library memory allocation functions. + + windowBits is in the range 8..15, and window is a user-supplied + window and output buffer that is 2**windowBits bytes. + */ +int ZEXPORT inflateBackInit_(z_streamp strm, int windowBits, + unsigned char FAR *window, const char *version, + int stream_size) { + struct inflate_state FAR *state; + + if (version == Z_NULL || version[0] != ZLIB_VERSION[0] || + stream_size != (int)(sizeof(z_stream))) + return Z_VERSION_ERROR; + if (strm == Z_NULL || window == Z_NULL || + windowBits < 8 || windowBits > 15) + return Z_STREAM_ERROR; + strm->msg = Z_NULL; /* in case we return an error */ + if (strm->zalloc == (alloc_func)0) { +#ifdef Z_SOLO + return Z_STREAM_ERROR; +#else + strm->zalloc = zcalloc; + strm->opaque = (voidpf)0; +#endif + } + if (strm->zfree == (free_func)0) +#ifdef Z_SOLO + return Z_STREAM_ERROR; +#else + strm->zfree = zcfree; +#endif + state = (struct inflate_state FAR *)ZALLOC(strm, 1, + sizeof(struct inflate_state)); + if (state == Z_NULL) return Z_MEM_ERROR; + Tracev((stderr, "inflate: allocated\n")); + strm->state = (struct internal_state FAR *)state; + state->dmax = 32768U; + state->wbits = (uInt)windowBits; + state->wsize = 1U << windowBits; + state->window = window; + state->wnext = 0; + state->whave = 0; + state->sane = 1; + return Z_OK; +} + +/* + Return state with length and distance decoding tables and index sizes set to + fixed code decoding. Normally this returns fixed tables from inffixed.h. + If BUILDFIXED is defined, then instead this routine builds the tables the + first time it's called, and returns those tables the first time and + thereafter. This reduces the size of the code by about 2K bytes, in + exchange for a little execution time. However, BUILDFIXED should not be + used for threaded applications, since the rewriting of the tables and virgin + may not be thread-safe. + */ +local void fixedtables(struct inflate_state FAR *state) { +#ifdef BUILDFIXED + static int virgin = 1; + static code *lenfix, *distfix; + static code fixed[544]; + + /* build fixed huffman tables if first call (may not be thread safe) */ + if (virgin) { + unsigned sym, bits; + static code *next; + + /* literal/length table */ + sym = 0; + while (sym < 144) state->lens[sym++] = 8; + while (sym < 256) state->lens[sym++] = 9; + while (sym < 280) state->lens[sym++] = 7; + while (sym < 288) state->lens[sym++] = 8; + next = fixed; + lenfix = next; + bits = 9; + inflate_table(LENS, state->lens, 288, &(next), &(bits), state->work); + + /* distance table */ + sym = 0; + while (sym < 32) state->lens[sym++] = 5; + distfix = next; + bits = 5; + inflate_table(DISTS, state->lens, 32, &(next), &(bits), state->work); + + /* do this just once */ + virgin = 0; + } +#else /* !BUILDFIXED */ +# include "inffixed.h" +#endif /* BUILDFIXED */ + state->lencode = lenfix; + state->lenbits = 9; + state->distcode = distfix; + state->distbits = 5; +} + +/* Macros for inflateBack(): */ + +/* Load returned state from inflate_fast() */ +#define LOAD() \ + do { \ + put = strm->next_out; \ + left = strm->avail_out; \ + next = strm->next_in; \ + have = strm->avail_in; \ + hold = state->hold; \ + bits = state->bits; \ + } while (0) + +/* Set state from registers for inflate_fast() */ +#define RESTORE() \ + do { \ + strm->next_out = put; \ + strm->avail_out = left; \ + strm->next_in = next; \ + strm->avail_in = have; \ + state->hold = hold; \ + state->bits = bits; \ + } while (0) + +/* Clear the input bit accumulator */ +#define INITBITS() \ + do { \ + hold = 0; \ + bits = 0; \ + } while (0) + +/* Assure that some input is available. If input is requested, but denied, + then return a Z_BUF_ERROR from inflateBack(). */ +#define PULL() \ + do { \ + if (have == 0) { \ + have = in(in_desc, &next); \ + if (have == 0) { \ + next = Z_NULL; \ + ret = Z_BUF_ERROR; \ + goto inf_leave; \ + } \ + } \ + } while (0) + +/* Get a byte of input into the bit accumulator, or return from inflateBack() + with an error if there is no input available. */ +#define PULLBYTE() \ + do { \ + PULL(); \ + have--; \ + hold += (unsigned long)(*next++) << bits; \ + bits += 8; \ + } while (0) + +/* Assure that there are at least n bits in the bit accumulator. If there is + not enough available input to do that, then return from inflateBack() with + an error. */ +#define NEEDBITS(n) \ + do { \ + while (bits < (unsigned)(n)) \ + PULLBYTE(); \ + } while (0) + +/* Return the low n bits of the bit accumulator (n < 16) */ +#define BITS(n) \ + ((unsigned)hold & ((1U << (n)) - 1)) + +/* Remove n bits from the bit accumulator */ +#define DROPBITS(n) \ + do { \ + hold >>= (n); \ + bits -= (unsigned)(n); \ + } while (0) + +/* Remove zero to seven bits as needed to go to a byte boundary */ +#define BYTEBITS() \ + do { \ + hold >>= bits & 7; \ + bits -= bits & 7; \ + } while (0) + +/* Assure that some output space is available, by writing out the window + if it's full. If the write fails, return from inflateBack() with a + Z_BUF_ERROR. */ +#define ROOM() \ + do { \ + if (left == 0) { \ + put = state->window; \ + left = state->wsize; \ + state->whave = left; \ + if (out(out_desc, put, left)) { \ + ret = Z_BUF_ERROR; \ + goto inf_leave; \ + } \ + } \ + } while (0) + +/* + strm provides the memory allocation functions and window buffer on input, + and provides information on the unused input on return. For Z_DATA_ERROR + returns, strm will also provide an error message. + + in() and out() are the call-back input and output functions. When + inflateBack() needs more input, it calls in(). When inflateBack() has + filled the window with output, or when it completes with data in the + window, it calls out() to write out the data. The application must not + change the provided input until in() is called again or inflateBack() + returns. The application must not change the window/output buffer until + inflateBack() returns. + + in() and out() are called with a descriptor parameter provided in the + inflateBack() call. This parameter can be a structure that provides the + information required to do the read or write, as well as accumulated + information on the input and output such as totals and check values. + + in() should return zero on failure. out() should return non-zero on + failure. If either in() or out() fails, than inflateBack() returns a + Z_BUF_ERROR. strm->next_in can be checked for Z_NULL to see whether it + was in() or out() that caused in the error. Otherwise, inflateBack() + returns Z_STREAM_END on success, Z_DATA_ERROR for an deflate format + error, or Z_MEM_ERROR if it could not allocate memory for the state. + inflateBack() can also return Z_STREAM_ERROR if the input parameters + are not correct, i.e. strm is Z_NULL or the state was not initialized. + */ +int ZEXPORT inflateBack(z_streamp strm, in_func in, void FAR *in_desc, + out_func out, void FAR *out_desc) { + struct inflate_state FAR *state; + z_const unsigned char FAR *next; /* next input */ + unsigned char FAR *put; /* next output */ + unsigned have, left; /* available input and output */ + unsigned long hold; /* bit buffer */ + unsigned bits; /* bits in bit buffer */ + unsigned copy; /* number of stored or match bytes to copy */ + unsigned char FAR *from; /* where to copy match bytes from */ + code here; /* current decoding table entry */ + code last; /* parent table entry */ + unsigned len; /* length to copy for repeats, bits to drop */ + int ret; /* return code */ + static const unsigned short order[19] = /* permutation of code lengths */ + {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}; + + /* Check that the strm exists and that the state was initialized */ + if (strm == Z_NULL || strm->state == Z_NULL) + return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + + /* Reset the state */ + strm->msg = Z_NULL; + state->mode = TYPE; + state->last = 0; + state->whave = 0; + next = strm->next_in; + have = next != Z_NULL ? strm->avail_in : 0; + hold = 0; + bits = 0; + put = state->window; + left = state->wsize; + + /* Inflate until end of block marked as last */ + for (;;) + switch (state->mode) { + case TYPE: + /* determine and dispatch block type */ + if (state->last) { + BYTEBITS(); + state->mode = DONE; + break; + } + NEEDBITS(3); + state->last = BITS(1); + DROPBITS(1); + switch (BITS(2)) { + case 0: /* stored block */ + Tracev((stderr, "inflate: stored block%s\n", + state->last ? " (last)" : "")); + state->mode = STORED; + break; + case 1: /* fixed block */ + fixedtables(state); + Tracev((stderr, "inflate: fixed codes block%s\n", + state->last ? " (last)" : "")); + state->mode = LEN; /* decode codes */ + break; + case 2: /* dynamic block */ + Tracev((stderr, "inflate: dynamic codes block%s\n", + state->last ? " (last)" : "")); + state->mode = TABLE; + break; + case 3: + strm->msg = (char *)"invalid block type"; + state->mode = BAD; + } + DROPBITS(2); + break; + + case STORED: + /* get and verify stored block length */ + BYTEBITS(); /* go to byte boundary */ + NEEDBITS(32); + if ((hold & 0xffff) != ((hold >> 16) ^ 0xffff)) { + strm->msg = (char *)"invalid stored block lengths"; + state->mode = BAD; + break; + } + state->length = (unsigned)hold & 0xffff; + Tracev((stderr, "inflate: stored length %u\n", + state->length)); + INITBITS(); + + /* copy stored block from input to output */ + while (state->length != 0) { + copy = state->length; + PULL(); + ROOM(); + if (copy > have) copy = have; + if (copy > left) copy = left; + zmemcpy(put, next, copy); + have -= copy; + next += copy; + left -= copy; + put += copy; + state->length -= copy; + } + Tracev((stderr, "inflate: stored end\n")); + state->mode = TYPE; + break; + + case TABLE: + /* get dynamic table entries descriptor */ + NEEDBITS(14); + state->nlen = BITS(5) + 257; + DROPBITS(5); + state->ndist = BITS(5) + 1; + DROPBITS(5); + state->ncode = BITS(4) + 4; + DROPBITS(4); +#ifndef PKZIP_BUG_WORKAROUND + if (state->nlen > 286 || state->ndist > 30) { + strm->msg = (char *)"too many length or distance symbols"; + state->mode = BAD; + break; + } +#endif + Tracev((stderr, "inflate: table sizes ok\n")); + + /* get code length code lengths (not a typo) */ + state->have = 0; + while (state->have < state->ncode) { + NEEDBITS(3); + state->lens[order[state->have++]] = (unsigned short)BITS(3); + DROPBITS(3); + } + while (state->have < 19) + state->lens[order[state->have++]] = 0; + state->next = state->codes; + state->lencode = (code const FAR *)(state->next); + state->lenbits = 7; + ret = inflate_table(CODES, state->lens, 19, &(state->next), + &(state->lenbits), state->work); + if (ret) { + strm->msg = (char *)"invalid code lengths set"; + state->mode = BAD; + break; + } + Tracev((stderr, "inflate: code lengths ok\n")); + + /* get length and distance code code lengths */ + state->have = 0; + while (state->have < state->nlen + state->ndist) { + for (;;) { + here = state->lencode[BITS(state->lenbits)]; + if ((unsigned)(here.bits) <= bits) break; + PULLBYTE(); + } + if (here.val < 16) { + DROPBITS(here.bits); + state->lens[state->have++] = here.val; + } + else { + if (here.val == 16) { + NEEDBITS(here.bits + 2); + DROPBITS(here.bits); + if (state->have == 0) { + strm->msg = (char *)"invalid bit length repeat"; + state->mode = BAD; + break; + } + len = (unsigned)(state->lens[state->have - 1]); + copy = 3 + BITS(2); + DROPBITS(2); + } + else if (here.val == 17) { + NEEDBITS(here.bits + 3); + DROPBITS(here.bits); + len = 0; + copy = 3 + BITS(3); + DROPBITS(3); + } + else { + NEEDBITS(here.bits + 7); + DROPBITS(here.bits); + len = 0; + copy = 11 + BITS(7); + DROPBITS(7); + } + if (state->have + copy > state->nlen + state->ndist) { + strm->msg = (char *)"invalid bit length repeat"; + state->mode = BAD; + break; + } + while (copy--) + state->lens[state->have++] = (unsigned short)len; + } + } + + /* handle error breaks in while */ + if (state->mode == BAD) break; + + /* check for end-of-block code (better have one) */ + if (state->lens[256] == 0) { + strm->msg = (char *)"invalid code -- missing end-of-block"; + state->mode = BAD; + break; + } + + /* build code tables -- note: do not change the lenbits or distbits + values here (9 and 6) without reading the comments in inftrees.h + concerning the ENOUGH constants, which depend on those values */ + state->next = state->codes; + state->lencode = (code const FAR *)(state->next); + state->lenbits = 9; + ret = inflate_table(LENS, state->lens, state->nlen, &(state->next), + &(state->lenbits), state->work); + if (ret) { + strm->msg = (char *)"invalid literal/lengths set"; + state->mode = BAD; + break; + } + state->distcode = (code const FAR *)(state->next); + state->distbits = 6; + ret = inflate_table(DISTS, state->lens + state->nlen, state->ndist, + &(state->next), &(state->distbits), state->work); + if (ret) { + strm->msg = (char *)"invalid distances set"; + state->mode = BAD; + break; + } + Tracev((stderr, "inflate: codes ok\n")); + state->mode = LEN; + /* fallthrough */ + + case LEN: + /* use inflate_fast() if we have enough input and output */ + if (have >= 6 && left >= 258) { + RESTORE(); + if (state->whave < state->wsize) + state->whave = state->wsize - left; + inflate_fast(strm, state->wsize); + LOAD(); + break; + } + + /* get a literal, length, or end-of-block code */ + for (;;) { + here = state->lencode[BITS(state->lenbits)]; + if ((unsigned)(here.bits) <= bits) break; + PULLBYTE(); + } + if (here.op && (here.op & 0xf0) == 0) { + last = here; + for (;;) { + here = state->lencode[last.val + + (BITS(last.bits + last.op) >> last.bits)]; + if ((unsigned)(last.bits + here.bits) <= bits) break; + PULLBYTE(); + } + DROPBITS(last.bits); + } + DROPBITS(here.bits); + state->length = (unsigned)here.val; + + /* process literal */ + if (here.op == 0) { + Tracevv((stderr, here.val >= 0x20 && here.val < 0x7f ? + "inflate: literal '%c'\n" : + "inflate: literal 0x%02x\n", here.val)); + ROOM(); + *put++ = (unsigned char)(state->length); + left--; + state->mode = LEN; + break; + } + + /* process end of block */ + if (here.op & 32) { + Tracevv((stderr, "inflate: end of block\n")); + state->mode = TYPE; + break; + } + + /* invalid code */ + if (here.op & 64) { + strm->msg = (char *)"invalid literal/length code"; + state->mode = BAD; + break; + } + + /* length code -- get extra bits, if any */ + state->extra = (unsigned)(here.op) & 15; + if (state->extra != 0) { + NEEDBITS(state->extra); + state->length += BITS(state->extra); + DROPBITS(state->extra); + } + Tracevv((stderr, "inflate: length %u\n", state->length)); + + /* get distance code */ + for (;;) { + here = state->distcode[BITS(state->distbits)]; + if ((unsigned)(here.bits) <= bits) break; + PULLBYTE(); + } + if ((here.op & 0xf0) == 0) { + last = here; + for (;;) { + here = state->distcode[last.val + + (BITS(last.bits + last.op) >> last.bits)]; + if ((unsigned)(last.bits + here.bits) <= bits) break; + PULLBYTE(); + } + DROPBITS(last.bits); + } + DROPBITS(here.bits); + if (here.op & 64) { + strm->msg = (char *)"invalid distance code"; + state->mode = BAD; + break; + } + state->offset = (unsigned)here.val; + + /* get distance extra bits, if any */ + state->extra = (unsigned)(here.op) & 15; + if (state->extra != 0) { + NEEDBITS(state->extra); + state->offset += BITS(state->extra); + DROPBITS(state->extra); + } + if (state->offset > state->wsize - (state->whave < state->wsize ? + left : 0)) { + strm->msg = (char *)"invalid distance too far back"; + state->mode = BAD; + break; + } + Tracevv((stderr, "inflate: distance %u\n", state->offset)); + + /* copy match from window to output */ + do { + ROOM(); + copy = state->wsize - state->offset; + if (copy < left) { + from = put + copy; + copy = left - copy; + } + else { + from = put - state->offset; + copy = left; + } + if (copy > state->length) copy = state->length; + state->length -= copy; + left -= copy; + do { + *put++ = *from++; + } while (--copy); + } while (state->length != 0); + break; + + case DONE: + /* inflate stream terminated properly */ + ret = Z_STREAM_END; + goto inf_leave; + + case BAD: + ret = Z_DATA_ERROR; + goto inf_leave; + + default: + /* can't happen, but makes compilers happy */ + ret = Z_STREAM_ERROR; + goto inf_leave; + } + + /* Write leftover output and return unused input */ + inf_leave: + if (left < state->wsize) { + if (out(out_desc, state->window, state->wsize - left) && + ret == Z_STREAM_END) + ret = Z_BUF_ERROR; + } + strm->next_in = next; + strm->avail_in = have; + return ret; +} + +int ZEXPORT inflateBackEnd(z_streamp strm) { + if (strm == Z_NULL || strm->state == Z_NULL || strm->zfree == (free_func)0) + return Z_STREAM_ERROR; + ZFREE(strm, strm->state); + strm->state = Z_NULL; + Tracev((stderr, "inflate: end\n")); + return Z_OK; +} diff --git a/c-blosc/internal-complibs/zlib-1.3.1/inffast.c b/c-blosc/internal-complibs/zlib-1.3.1/inffast.c new file mode 100644 index 0000000..9354676 --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.3.1/inffast.c @@ -0,0 +1,320 @@ +/* inffast.c -- fast decoding + * Copyright (C) 1995-2017 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#include "zutil.h" +#include "inftrees.h" +#include "inflate.h" +#include "inffast.h" + +#ifdef ASMINF +# pragma message("Assembler code may have bugs -- use at your own risk") +#else + +/* + Decode literal, length, and distance codes and write out the resulting + literal and match bytes until either not enough input or output is + available, an end-of-block is encountered, or a data error is encountered. + When large enough input and output buffers are supplied to inflate(), for + example, a 16K input buffer and a 64K output buffer, more than 95% of the + inflate execution time is spent in this routine. + + Entry assumptions: + + state->mode == LEN + strm->avail_in >= 6 + strm->avail_out >= 258 + start >= strm->avail_out + state->bits < 8 + + On return, state->mode is one of: + + LEN -- ran out of enough output space or enough available input + TYPE -- reached end of block code, inflate() to interpret next block + BAD -- error in block data + + Notes: + + - The maximum input bits used by a length/distance pair is 15 bits for the + length code, 5 bits for the length extra, 15 bits for the distance code, + and 13 bits for the distance extra. This totals 48 bits, or six bytes. + Therefore if strm->avail_in >= 6, then there is enough input to avoid + checking for available input while decoding. + + - The maximum bytes that a single length/distance pair can output is 258 + bytes, which is the maximum length that can be coded. inflate_fast() + requires strm->avail_out >= 258 for each loop to avoid checking for + output space. + */ +void ZLIB_INTERNAL inflate_fast(z_streamp strm, unsigned start) { + struct inflate_state FAR *state; + z_const unsigned char FAR *in; /* local strm->next_in */ + z_const unsigned char FAR *last; /* have enough input while in < last */ + unsigned char FAR *out; /* local strm->next_out */ + unsigned char FAR *beg; /* inflate()'s initial strm->next_out */ + unsigned char FAR *end; /* while out < end, enough space available */ +#ifdef INFLATE_STRICT + unsigned dmax; /* maximum distance from zlib header */ +#endif + unsigned wsize; /* window size or zero if not using window */ + unsigned whave; /* valid bytes in the window */ + unsigned wnext; /* window write index */ + unsigned char FAR *window; /* allocated sliding window, if wsize != 0 */ + unsigned long hold; /* local strm->hold */ + unsigned bits; /* local strm->bits */ + code const FAR *lcode; /* local strm->lencode */ + code const FAR *dcode; /* local strm->distcode */ + unsigned lmask; /* mask for first level of length codes */ + unsigned dmask; /* mask for first level of distance codes */ + code const *here; /* retrieved table entry */ + unsigned op; /* code bits, operation, extra bits, or */ + /* window position, window bytes to copy */ + unsigned len; /* match length, unused bytes */ + unsigned dist; /* match distance */ + unsigned char FAR *from; /* where to copy match from */ + + /* copy state to local variables */ + state = (struct inflate_state FAR *)strm->state; + in = strm->next_in; + last = in + (strm->avail_in - 5); + out = strm->next_out; + beg = out - (start - strm->avail_out); + end = out + (strm->avail_out - 257); +#ifdef INFLATE_STRICT + dmax = state->dmax; +#endif + wsize = state->wsize; + whave = state->whave; + wnext = state->wnext; + window = state->window; + hold = state->hold; + bits = state->bits; + lcode = state->lencode; + dcode = state->distcode; + lmask = (1U << state->lenbits) - 1; + dmask = (1U << state->distbits) - 1; + + /* decode literals and length/distances until end-of-block or not enough + input data or output space */ + do { + if (bits < 15) { + hold += (unsigned long)(*in++) << bits; + bits += 8; + hold += (unsigned long)(*in++) << bits; + bits += 8; + } + here = lcode + (hold & lmask); + dolen: + op = (unsigned)(here->bits); + hold >>= op; + bits -= op; + op = (unsigned)(here->op); + if (op == 0) { /* literal */ + Tracevv((stderr, here->val >= 0x20 && here->val < 0x7f ? + "inflate: literal '%c'\n" : + "inflate: literal 0x%02x\n", here->val)); + *out++ = (unsigned char)(here->val); + } + else if (op & 16) { /* length base */ + len = (unsigned)(here->val); + op &= 15; /* number of extra bits */ + if (op) { + if (bits < op) { + hold += (unsigned long)(*in++) << bits; + bits += 8; + } + len += (unsigned)hold & ((1U << op) - 1); + hold >>= op; + bits -= op; + } + Tracevv((stderr, "inflate: length %u\n", len)); + if (bits < 15) { + hold += (unsigned long)(*in++) << bits; + bits += 8; + hold += (unsigned long)(*in++) << bits; + bits += 8; + } + here = dcode + (hold & dmask); + dodist: + op = (unsigned)(here->bits); + hold >>= op; + bits -= op; + op = (unsigned)(here->op); + if (op & 16) { /* distance base */ + dist = (unsigned)(here->val); + op &= 15; /* number of extra bits */ + if (bits < op) { + hold += (unsigned long)(*in++) << bits; + bits += 8; + if (bits < op) { + hold += (unsigned long)(*in++) << bits; + bits += 8; + } + } + dist += (unsigned)hold & ((1U << op) - 1); +#ifdef INFLATE_STRICT + if (dist > dmax) { + strm->msg = (char *)"invalid distance too far back"; + state->mode = BAD; + break; + } +#endif + hold >>= op; + bits -= op; + Tracevv((stderr, "inflate: distance %u\n", dist)); + op = (unsigned)(out - beg); /* max distance in output */ + if (dist > op) { /* see if copy from window */ + op = dist - op; /* distance back in window */ + if (op > whave) { + if (state->sane) { + strm->msg = + (char *)"invalid distance too far back"; + state->mode = BAD; + break; + } +#ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR + if (len <= op - whave) { + do { + *out++ = 0; + } while (--len); + continue; + } + len -= op - whave; + do { + *out++ = 0; + } while (--op > whave); + if (op == 0) { + from = out - dist; + do { + *out++ = *from++; + } while (--len); + continue; + } +#endif + } + from = window; + if (wnext == 0) { /* very common case */ + from += wsize - op; + if (op < len) { /* some from window */ + len -= op; + do { + *out++ = *from++; + } while (--op); + from = out - dist; /* rest from output */ + } + } + else if (wnext < op) { /* wrap around window */ + from += wsize + wnext - op; + op -= wnext; + if (op < len) { /* some from end of window */ + len -= op; + do { + *out++ = *from++; + } while (--op); + from = window; + if (wnext < len) { /* some from start of window */ + op = wnext; + len -= op; + do { + *out++ = *from++; + } while (--op); + from = out - dist; /* rest from output */ + } + } + } + else { /* contiguous in window */ + from += wnext - op; + if (op < len) { /* some from window */ + len -= op; + do { + *out++ = *from++; + } while (--op); + from = out - dist; /* rest from output */ + } + } + while (len > 2) { + *out++ = *from++; + *out++ = *from++; + *out++ = *from++; + len -= 3; + } + if (len) { + *out++ = *from++; + if (len > 1) + *out++ = *from++; + } + } + else { + from = out - dist; /* copy direct from output */ + do { /* minimum length is three */ + *out++ = *from++; + *out++ = *from++; + *out++ = *from++; + len -= 3; + } while (len > 2); + if (len) { + *out++ = *from++; + if (len > 1) + *out++ = *from++; + } + } + } + else if ((op & 64) == 0) { /* 2nd level distance code */ + here = dcode + here->val + (hold & ((1U << op) - 1)); + goto dodist; + } + else { + strm->msg = (char *)"invalid distance code"; + state->mode = BAD; + break; + } + } + else if ((op & 64) == 0) { /* 2nd level length code */ + here = lcode + here->val + (hold & ((1U << op) - 1)); + goto dolen; + } + else if (op & 32) { /* end-of-block */ + Tracevv((stderr, "inflate: end of block\n")); + state->mode = TYPE; + break; + } + else { + strm->msg = (char *)"invalid literal/length code"; + state->mode = BAD; + break; + } + } while (in < last && out < end); + + /* return unused bytes (on entry, bits < 8, so in won't go too far back) */ + len = bits >> 3; + in -= len; + bits -= len << 3; + hold &= (1U << bits) - 1; + + /* update state and return */ + strm->next_in = in; + strm->next_out = out; + strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last)); + strm->avail_out = (unsigned)(out < end ? + 257 + (end - out) : 257 - (out - end)); + state->hold = hold; + state->bits = bits; + return; +} + +/* + inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe): + - Using bit fields for code structure + - Different op definition to avoid & for extra bits (do & for table bits) + - Three separate decoding do-loops for direct, window, and wnext == 0 + - Special case for distance > 1 copies to do overlapped load and store copy + - Explicit branch predictions (based on measured branch probabilities) + - Deferring match copy and interspersed it with decoding subsequent codes + - Swapping literal/length else + - Swapping window/direct else + - Larger unrolled copy loops (three is about right) + - Moving len -= 3 statement into middle of loop + */ + +#endif /* !ASMINF */ diff --git a/c-blosc/internal-complibs/zlib-1.3.1/inffast.h b/c-blosc/internal-complibs/zlib-1.3.1/inffast.h new file mode 100644 index 0000000..49c6d15 --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.3.1/inffast.h @@ -0,0 +1,11 @@ +/* inffast.h -- header to use inffast.c + * Copyright (C) 1995-2003, 2010 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +void ZLIB_INTERNAL inflate_fast(z_streamp strm, unsigned start); diff --git a/c-blosc/internal-complibs/zlib-1.3.1/inffixed.h b/c-blosc/internal-complibs/zlib-1.3.1/inffixed.h new file mode 100644 index 0000000..d628327 --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.3.1/inffixed.h @@ -0,0 +1,94 @@ + /* inffixed.h -- table for decoding fixed codes + * Generated automatically by makefixed(). + */ + + /* WARNING: this file should *not* be used by applications. + It is part of the implementation of this library and is + subject to change. Applications should only use zlib.h. + */ + + static const code lenfix[512] = { + {96,7,0},{0,8,80},{0,8,16},{20,8,115},{18,7,31},{0,8,112},{0,8,48}, + {0,9,192},{16,7,10},{0,8,96},{0,8,32},{0,9,160},{0,8,0},{0,8,128}, + {0,8,64},{0,9,224},{16,7,6},{0,8,88},{0,8,24},{0,9,144},{19,7,59}, + {0,8,120},{0,8,56},{0,9,208},{17,7,17},{0,8,104},{0,8,40},{0,9,176}, + {0,8,8},{0,8,136},{0,8,72},{0,9,240},{16,7,4},{0,8,84},{0,8,20}, + {21,8,227},{19,7,43},{0,8,116},{0,8,52},{0,9,200},{17,7,13},{0,8,100}, + {0,8,36},{0,9,168},{0,8,4},{0,8,132},{0,8,68},{0,9,232},{16,7,8}, + {0,8,92},{0,8,28},{0,9,152},{20,7,83},{0,8,124},{0,8,60},{0,9,216}, + {18,7,23},{0,8,108},{0,8,44},{0,9,184},{0,8,12},{0,8,140},{0,8,76}, + {0,9,248},{16,7,3},{0,8,82},{0,8,18},{21,8,163},{19,7,35},{0,8,114}, + {0,8,50},{0,9,196},{17,7,11},{0,8,98},{0,8,34},{0,9,164},{0,8,2}, + {0,8,130},{0,8,66},{0,9,228},{16,7,7},{0,8,90},{0,8,26},{0,9,148}, + {20,7,67},{0,8,122},{0,8,58},{0,9,212},{18,7,19},{0,8,106},{0,8,42}, + {0,9,180},{0,8,10},{0,8,138},{0,8,74},{0,9,244},{16,7,5},{0,8,86}, + {0,8,22},{64,8,0},{19,7,51},{0,8,118},{0,8,54},{0,9,204},{17,7,15}, + {0,8,102},{0,8,38},{0,9,172},{0,8,6},{0,8,134},{0,8,70},{0,9,236}, + {16,7,9},{0,8,94},{0,8,30},{0,9,156},{20,7,99},{0,8,126},{0,8,62}, + {0,9,220},{18,7,27},{0,8,110},{0,8,46},{0,9,188},{0,8,14},{0,8,142}, + {0,8,78},{0,9,252},{96,7,0},{0,8,81},{0,8,17},{21,8,131},{18,7,31}, + {0,8,113},{0,8,49},{0,9,194},{16,7,10},{0,8,97},{0,8,33},{0,9,162}, + {0,8,1},{0,8,129},{0,8,65},{0,9,226},{16,7,6},{0,8,89},{0,8,25}, + {0,9,146},{19,7,59},{0,8,121},{0,8,57},{0,9,210},{17,7,17},{0,8,105}, + {0,8,41},{0,9,178},{0,8,9},{0,8,137},{0,8,73},{0,9,242},{16,7,4}, + {0,8,85},{0,8,21},{16,8,258},{19,7,43},{0,8,117},{0,8,53},{0,9,202}, + {17,7,13},{0,8,101},{0,8,37},{0,9,170},{0,8,5},{0,8,133},{0,8,69}, + {0,9,234},{16,7,8},{0,8,93},{0,8,29},{0,9,154},{20,7,83},{0,8,125}, + {0,8,61},{0,9,218},{18,7,23},{0,8,109},{0,8,45},{0,9,186},{0,8,13}, + {0,8,141},{0,8,77},{0,9,250},{16,7,3},{0,8,83},{0,8,19},{21,8,195}, + {19,7,35},{0,8,115},{0,8,51},{0,9,198},{17,7,11},{0,8,99},{0,8,35}, + {0,9,166},{0,8,3},{0,8,131},{0,8,67},{0,9,230},{16,7,7},{0,8,91}, + {0,8,27},{0,9,150},{20,7,67},{0,8,123},{0,8,59},{0,9,214},{18,7,19}, + {0,8,107},{0,8,43},{0,9,182},{0,8,11},{0,8,139},{0,8,75},{0,9,246}, + {16,7,5},{0,8,87},{0,8,23},{64,8,0},{19,7,51},{0,8,119},{0,8,55}, + {0,9,206},{17,7,15},{0,8,103},{0,8,39},{0,9,174},{0,8,7},{0,8,135}, + {0,8,71},{0,9,238},{16,7,9},{0,8,95},{0,8,31},{0,9,158},{20,7,99}, + {0,8,127},{0,8,63},{0,9,222},{18,7,27},{0,8,111},{0,8,47},{0,9,190}, + {0,8,15},{0,8,143},{0,8,79},{0,9,254},{96,7,0},{0,8,80},{0,8,16}, + {20,8,115},{18,7,31},{0,8,112},{0,8,48},{0,9,193},{16,7,10},{0,8,96}, + {0,8,32},{0,9,161},{0,8,0},{0,8,128},{0,8,64},{0,9,225},{16,7,6}, + {0,8,88},{0,8,24},{0,9,145},{19,7,59},{0,8,120},{0,8,56},{0,9,209}, + {17,7,17},{0,8,104},{0,8,40},{0,9,177},{0,8,8},{0,8,136},{0,8,72}, + {0,9,241},{16,7,4},{0,8,84},{0,8,20},{21,8,227},{19,7,43},{0,8,116}, + {0,8,52},{0,9,201},{17,7,13},{0,8,100},{0,8,36},{0,9,169},{0,8,4}, + {0,8,132},{0,8,68},{0,9,233},{16,7,8},{0,8,92},{0,8,28},{0,9,153}, + {20,7,83},{0,8,124},{0,8,60},{0,9,217},{18,7,23},{0,8,108},{0,8,44}, + {0,9,185},{0,8,12},{0,8,140},{0,8,76},{0,9,249},{16,7,3},{0,8,82}, + {0,8,18},{21,8,163},{19,7,35},{0,8,114},{0,8,50},{0,9,197},{17,7,11}, + {0,8,98},{0,8,34},{0,9,165},{0,8,2},{0,8,130},{0,8,66},{0,9,229}, + {16,7,7},{0,8,90},{0,8,26},{0,9,149},{20,7,67},{0,8,122},{0,8,58}, + {0,9,213},{18,7,19},{0,8,106},{0,8,42},{0,9,181},{0,8,10},{0,8,138}, + {0,8,74},{0,9,245},{16,7,5},{0,8,86},{0,8,22},{64,8,0},{19,7,51}, + {0,8,118},{0,8,54},{0,9,205},{17,7,15},{0,8,102},{0,8,38},{0,9,173}, + {0,8,6},{0,8,134},{0,8,70},{0,9,237},{16,7,9},{0,8,94},{0,8,30}, + {0,9,157},{20,7,99},{0,8,126},{0,8,62},{0,9,221},{18,7,27},{0,8,110}, + {0,8,46},{0,9,189},{0,8,14},{0,8,142},{0,8,78},{0,9,253},{96,7,0}, + {0,8,81},{0,8,17},{21,8,131},{18,7,31},{0,8,113},{0,8,49},{0,9,195}, + {16,7,10},{0,8,97},{0,8,33},{0,9,163},{0,8,1},{0,8,129},{0,8,65}, + {0,9,227},{16,7,6},{0,8,89},{0,8,25},{0,9,147},{19,7,59},{0,8,121}, + {0,8,57},{0,9,211},{17,7,17},{0,8,105},{0,8,41},{0,9,179},{0,8,9}, + {0,8,137},{0,8,73},{0,9,243},{16,7,4},{0,8,85},{0,8,21},{16,8,258}, + {19,7,43},{0,8,117},{0,8,53},{0,9,203},{17,7,13},{0,8,101},{0,8,37}, + {0,9,171},{0,8,5},{0,8,133},{0,8,69},{0,9,235},{16,7,8},{0,8,93}, + {0,8,29},{0,9,155},{20,7,83},{0,8,125},{0,8,61},{0,9,219},{18,7,23}, + {0,8,109},{0,8,45},{0,9,187},{0,8,13},{0,8,141},{0,8,77},{0,9,251}, + {16,7,3},{0,8,83},{0,8,19},{21,8,195},{19,7,35},{0,8,115},{0,8,51}, + {0,9,199},{17,7,11},{0,8,99},{0,8,35},{0,9,167},{0,8,3},{0,8,131}, + {0,8,67},{0,9,231},{16,7,7},{0,8,91},{0,8,27},{0,9,151},{20,7,67}, + {0,8,123},{0,8,59},{0,9,215},{18,7,19},{0,8,107},{0,8,43},{0,9,183}, + {0,8,11},{0,8,139},{0,8,75},{0,9,247},{16,7,5},{0,8,87},{0,8,23}, + {64,8,0},{19,7,51},{0,8,119},{0,8,55},{0,9,207},{17,7,15},{0,8,103}, + {0,8,39},{0,9,175},{0,8,7},{0,8,135},{0,8,71},{0,9,239},{16,7,9}, + {0,8,95},{0,8,31},{0,9,159},{20,7,99},{0,8,127},{0,8,63},{0,9,223}, + {18,7,27},{0,8,111},{0,8,47},{0,9,191},{0,8,15},{0,8,143},{0,8,79}, + {0,9,255} + }; + + static const code distfix[32] = { + {16,5,1},{23,5,257},{19,5,17},{27,5,4097},{17,5,5},{25,5,1025}, + {21,5,65},{29,5,16385},{16,5,3},{24,5,513},{20,5,33},{28,5,8193}, + {18,5,9},{26,5,2049},{22,5,129},{64,5,0},{16,5,2},{23,5,385}, + {19,5,25},{27,5,6145},{17,5,7},{25,5,1537},{21,5,97},{29,5,24577}, + {16,5,4},{24,5,769},{20,5,49},{28,5,12289},{18,5,13},{26,5,3073}, + {22,5,193},{64,5,0} + }; diff --git a/c-blosc/internal-complibs/zlib-1.3.1/inflate.c b/c-blosc/internal-complibs/zlib-1.3.1/inflate.c new file mode 100644 index 0000000..94ecff0 --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.3.1/inflate.c @@ -0,0 +1,1526 @@ +/* inflate.c -- zlib decompression + * Copyright (C) 1995-2022 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* + * Change history: + * + * 1.2.beta0 24 Nov 2002 + * - First version -- complete rewrite of inflate to simplify code, avoid + * creation of window when not needed, minimize use of window when it is + * needed, make inffast.c even faster, implement gzip decoding, and to + * improve code readability and style over the previous zlib inflate code + * + * 1.2.beta1 25 Nov 2002 + * - Use pointers for available input and output checking in inffast.c + * - Remove input and output counters in inffast.c + * - Change inffast.c entry and loop from avail_in >= 7 to >= 6 + * - Remove unnecessary second byte pull from length extra in inffast.c + * - Unroll direct copy to three copies per loop in inffast.c + * + * 1.2.beta2 4 Dec 2002 + * - Change external routine names to reduce potential conflicts + * - Correct filename to inffixed.h for fixed tables in inflate.c + * - Make hbuf[] unsigned char to match parameter type in inflate.c + * - Change strm->next_out[-state->offset] to *(strm->next_out - state->offset) + * to avoid negation problem on Alphas (64 bit) in inflate.c + * + * 1.2.beta3 22 Dec 2002 + * - Add comments on state->bits assertion in inffast.c + * - Add comments on op field in inftrees.h + * - Fix bug in reuse of allocated window after inflateReset() + * - Remove bit fields--back to byte structure for speed + * - Remove distance extra == 0 check in inflate_fast()--only helps for lengths + * - Change post-increments to pre-increments in inflate_fast(), PPC biased? + * - Add compile time option, POSTINC, to use post-increments instead (Intel?) + * - Make MATCH copy in inflate() much faster for when inflate_fast() not used + * - Use local copies of stream next and avail values, as well as local bit + * buffer and bit count in inflate()--for speed when inflate_fast() not used + * + * 1.2.beta4 1 Jan 2003 + * - Split ptr - 257 statements in inflate_table() to avoid compiler warnings + * - Move a comment on output buffer sizes from inffast.c to inflate.c + * - Add comments in inffast.c to introduce the inflate_fast() routine + * - Rearrange window copies in inflate_fast() for speed and simplification + * - Unroll last copy for window match in inflate_fast() + * - Use local copies of window variables in inflate_fast() for speed + * - Pull out common wnext == 0 case for speed in inflate_fast() + * - Make op and len in inflate_fast() unsigned for consistency + * - Add FAR to lcode and dcode declarations in inflate_fast() + * - Simplified bad distance check in inflate_fast() + * - Added inflateBackInit(), inflateBack(), and inflateBackEnd() in new + * source file infback.c to provide a call-back interface to inflate for + * programs like gzip and unzip -- uses window as output buffer to avoid + * window copying + * + * 1.2.beta5 1 Jan 2003 + * - Improved inflateBack() interface to allow the caller to provide initial + * input in strm. + * - Fixed stored blocks bug in inflateBack() + * + * 1.2.beta6 4 Jan 2003 + * - Added comments in inffast.c on effectiveness of POSTINC + * - Typecasting all around to reduce compiler warnings + * - Changed loops from while (1) or do {} while (1) to for (;;), again to + * make compilers happy + * - Changed type of window in inflateBackInit() to unsigned char * + * + * 1.2.beta7 27 Jan 2003 + * - Changed many types to unsigned or unsigned short to avoid warnings + * - Added inflateCopy() function + * + * 1.2.0 9 Mar 2003 + * - Changed inflateBack() interface to provide separate opaque descriptors + * for the in() and out() functions + * - Changed inflateBack() argument and in_func typedef to swap the length + * and buffer address return values for the input function + * - Check next_in and next_out for Z_NULL on entry to inflate() + * + * The history for versions after 1.2.0 are in ChangeLog in zlib distribution. + */ + +#include "zutil.h" +#include "inftrees.h" +#include "inflate.h" +#include "inffast.h" + +#ifdef MAKEFIXED +# ifndef BUILDFIXED +# define BUILDFIXED +# endif +#endif + +local int inflateStateCheck(z_streamp strm) { + struct inflate_state FAR *state; + if (strm == Z_NULL || + strm->zalloc == (alloc_func)0 || strm->zfree == (free_func)0) + return 1; + state = (struct inflate_state FAR *)strm->state; + if (state == Z_NULL || state->strm != strm || + state->mode < HEAD || state->mode > SYNC) + return 1; + return 0; +} + +int ZEXPORT inflateResetKeep(z_streamp strm) { + struct inflate_state FAR *state; + + if (inflateStateCheck(strm)) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + strm->total_in = strm->total_out = state->total = 0; + strm->msg = Z_NULL; + if (state->wrap) /* to support ill-conceived Java test suite */ + strm->adler = state->wrap & 1; + state->mode = HEAD; + state->last = 0; + state->havedict = 0; + state->flags = -1; + state->dmax = 32768U; + state->head = Z_NULL; + state->hold = 0; + state->bits = 0; + state->lencode = state->distcode = state->next = state->codes; + state->sane = 1; + state->back = -1; + Tracev((stderr, "inflate: reset\n")); + return Z_OK; +} + +int ZEXPORT inflateReset(z_streamp strm) { + struct inflate_state FAR *state; + + if (inflateStateCheck(strm)) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + state->wsize = 0; + state->whave = 0; + state->wnext = 0; + return inflateResetKeep(strm); +} + +int ZEXPORT inflateReset2(z_streamp strm, int windowBits) { + int wrap; + struct inflate_state FAR *state; + + /* get the state */ + if (inflateStateCheck(strm)) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + + /* extract wrap request from windowBits parameter */ + if (windowBits < 0) { + if (windowBits < -15) + return Z_STREAM_ERROR; + wrap = 0; + windowBits = -windowBits; + } + else { + wrap = (windowBits >> 4) + 5; +#ifdef GUNZIP + if (windowBits < 48) + windowBits &= 15; +#endif + } + + /* set number of window bits, free window if different */ + if (windowBits && (windowBits < 8 || windowBits > 15)) + return Z_STREAM_ERROR; + if (state->window != Z_NULL && state->wbits != (unsigned)windowBits) { + ZFREE(strm, state->window); + state->window = Z_NULL; + } + + /* update state and reset the rest of it */ + state->wrap = wrap; + state->wbits = (unsigned)windowBits; + return inflateReset(strm); +} + +int ZEXPORT inflateInit2_(z_streamp strm, int windowBits, + const char *version, int stream_size) { + int ret; + struct inflate_state FAR *state; + + if (version == Z_NULL || version[0] != ZLIB_VERSION[0] || + stream_size != (int)(sizeof(z_stream))) + return Z_VERSION_ERROR; + if (strm == Z_NULL) return Z_STREAM_ERROR; + strm->msg = Z_NULL; /* in case we return an error */ + if (strm->zalloc == (alloc_func)0) { +#ifdef Z_SOLO + return Z_STREAM_ERROR; +#else + strm->zalloc = zcalloc; + strm->opaque = (voidpf)0; +#endif + } + if (strm->zfree == (free_func)0) +#ifdef Z_SOLO + return Z_STREAM_ERROR; +#else + strm->zfree = zcfree; +#endif + state = (struct inflate_state FAR *) + ZALLOC(strm, 1, sizeof(struct inflate_state)); + if (state == Z_NULL) return Z_MEM_ERROR; + Tracev((stderr, "inflate: allocated\n")); + strm->state = (struct internal_state FAR *)state; + state->strm = strm; + state->window = Z_NULL; + state->mode = HEAD; /* to pass state test in inflateReset2() */ + ret = inflateReset2(strm, windowBits); + if (ret != Z_OK) { + ZFREE(strm, state); + strm->state = Z_NULL; + } + return ret; +} + +int ZEXPORT inflateInit_(z_streamp strm, const char *version, + int stream_size) { + return inflateInit2_(strm, DEF_WBITS, version, stream_size); +} + +int ZEXPORT inflatePrime(z_streamp strm, int bits, int value) { + struct inflate_state FAR *state; + + if (inflateStateCheck(strm)) return Z_STREAM_ERROR; + if (bits == 0) + return Z_OK; + state = (struct inflate_state FAR *)strm->state; + if (bits < 0) { + state->hold = 0; + state->bits = 0; + return Z_OK; + } + if (bits > 16 || state->bits + (uInt)bits > 32) return Z_STREAM_ERROR; + value &= (1L << bits) - 1; + state->hold += (unsigned)value << state->bits; + state->bits += (uInt)bits; + return Z_OK; +} + +/* + Return state with length and distance decoding tables and index sizes set to + fixed code decoding. Normally this returns fixed tables from inffixed.h. + If BUILDFIXED is defined, then instead this routine builds the tables the + first time it's called, and returns those tables the first time and + thereafter. This reduces the size of the code by about 2K bytes, in + exchange for a little execution time. However, BUILDFIXED should not be + used for threaded applications, since the rewriting of the tables and virgin + may not be thread-safe. + */ +local void fixedtables(struct inflate_state FAR *state) { +#ifdef BUILDFIXED + static int virgin = 1; + static code *lenfix, *distfix; + static code fixed[544]; + + /* build fixed huffman tables if first call (may not be thread safe) */ + if (virgin) { + unsigned sym, bits; + static code *next; + + /* literal/length table */ + sym = 0; + while (sym < 144) state->lens[sym++] = 8; + while (sym < 256) state->lens[sym++] = 9; + while (sym < 280) state->lens[sym++] = 7; + while (sym < 288) state->lens[sym++] = 8; + next = fixed; + lenfix = next; + bits = 9; + inflate_table(LENS, state->lens, 288, &(next), &(bits), state->work); + + /* distance table */ + sym = 0; + while (sym < 32) state->lens[sym++] = 5; + distfix = next; + bits = 5; + inflate_table(DISTS, state->lens, 32, &(next), &(bits), state->work); + + /* do this just once */ + virgin = 0; + } +#else /* !BUILDFIXED */ +# include "inffixed.h" +#endif /* BUILDFIXED */ + state->lencode = lenfix; + state->lenbits = 9; + state->distcode = distfix; + state->distbits = 5; +} + +#ifdef MAKEFIXED +#include + +/* + Write out the inffixed.h that is #include'd above. Defining MAKEFIXED also + defines BUILDFIXED, so the tables are built on the fly. makefixed() writes + those tables to stdout, which would be piped to inffixed.h. A small program + can simply call makefixed to do this: + + void makefixed(void); + + int main(void) + { + makefixed(); + return 0; + } + + Then that can be linked with zlib built with MAKEFIXED defined and run: + + a.out > inffixed.h + */ +void makefixed(void) +{ + unsigned low, size; + struct inflate_state state; + + fixedtables(&state); + puts(" /* inffixed.h -- table for decoding fixed codes"); + puts(" * Generated automatically by makefixed()."); + puts(" */"); + puts(""); + puts(" /* WARNING: this file should *not* be used by applications."); + puts(" It is part of the implementation of this library and is"); + puts(" subject to change. Applications should only use zlib.h."); + puts(" */"); + puts(""); + size = 1U << 9; + printf(" static const code lenfix[%u] = {", size); + low = 0; + for (;;) { + if ((low % 7) == 0) printf("\n "); + printf("{%u,%u,%d}", (low & 127) == 99 ? 64 : state.lencode[low].op, + state.lencode[low].bits, state.lencode[low].val); + if (++low == size) break; + putchar(','); + } + puts("\n };"); + size = 1U << 5; + printf("\n static const code distfix[%u] = {", size); + low = 0; + for (;;) { + if ((low % 6) == 0) printf("\n "); + printf("{%u,%u,%d}", state.distcode[low].op, state.distcode[low].bits, + state.distcode[low].val); + if (++low == size) break; + putchar(','); + } + puts("\n };"); +} +#endif /* MAKEFIXED */ + +/* + Update the window with the last wsize (normally 32K) bytes written before + returning. If window does not exist yet, create it. This is only called + when a window is already in use, or when output has been written during this + inflate call, but the end of the deflate stream has not been reached yet. + It is also called to create a window for dictionary data when a dictionary + is loaded. + + Providing output buffers larger than 32K to inflate() should provide a speed + advantage, since only the last 32K of output is copied to the sliding window + upon return from inflate(), and since all distances after the first 32K of + output will fall in the output data, making match copies simpler and faster. + The advantage may be dependent on the size of the processor's data caches. + */ +local int updatewindow(z_streamp strm, const Bytef *end, unsigned copy) { + struct inflate_state FAR *state; + unsigned dist; + + state = (struct inflate_state FAR *)strm->state; + + /* if it hasn't been done already, allocate space for the window */ + if (state->window == Z_NULL) { + state->window = (unsigned char FAR *) + ZALLOC(strm, 1U << state->wbits, + sizeof(unsigned char)); + if (state->window == Z_NULL) return 1; + } + + /* if window not in use yet, initialize */ + if (state->wsize == 0) { + state->wsize = 1U << state->wbits; + state->wnext = 0; + state->whave = 0; + } + + /* copy state->wsize or less output bytes into the circular window */ + if (copy >= state->wsize) { + zmemcpy(state->window, end - state->wsize, state->wsize); + state->wnext = 0; + state->whave = state->wsize; + } + else { + dist = state->wsize - state->wnext; + if (dist > copy) dist = copy; + zmemcpy(state->window + state->wnext, end - copy, dist); + copy -= dist; + if (copy) { + zmemcpy(state->window, end - copy, copy); + state->wnext = copy; + state->whave = state->wsize; + } + else { + state->wnext += dist; + if (state->wnext == state->wsize) state->wnext = 0; + if (state->whave < state->wsize) state->whave += dist; + } + } + return 0; +} + +/* Macros for inflate(): */ + +/* check function to use adler32() for zlib or crc32() for gzip */ +#ifdef GUNZIP +# define UPDATE_CHECK(check, buf, len) \ + (state->flags ? crc32(check, buf, len) : adler32(check, buf, len)) +#else +# define UPDATE_CHECK(check, buf, len) adler32(check, buf, len) +#endif + +/* check macros for header crc */ +#ifdef GUNZIP +# define CRC2(check, word) \ + do { \ + hbuf[0] = (unsigned char)(word); \ + hbuf[1] = (unsigned char)((word) >> 8); \ + check = crc32(check, hbuf, 2); \ + } while (0) + +# define CRC4(check, word) \ + do { \ + hbuf[0] = (unsigned char)(word); \ + hbuf[1] = (unsigned char)((word) >> 8); \ + hbuf[2] = (unsigned char)((word) >> 16); \ + hbuf[3] = (unsigned char)((word) >> 24); \ + check = crc32(check, hbuf, 4); \ + } while (0) +#endif + +/* Load registers with state in inflate() for speed */ +#define LOAD() \ + do { \ + put = strm->next_out; \ + left = strm->avail_out; \ + next = strm->next_in; \ + have = strm->avail_in; \ + hold = state->hold; \ + bits = state->bits; \ + } while (0) + +/* Restore state from registers in inflate() */ +#define RESTORE() \ + do { \ + strm->next_out = put; \ + strm->avail_out = left; \ + strm->next_in = next; \ + strm->avail_in = have; \ + state->hold = hold; \ + state->bits = bits; \ + } while (0) + +/* Clear the input bit accumulator */ +#define INITBITS() \ + do { \ + hold = 0; \ + bits = 0; \ + } while (0) + +/* Get a byte of input into the bit accumulator, or return from inflate() + if there is no input available. */ +#define PULLBYTE() \ + do { \ + if (have == 0) goto inf_leave; \ + have--; \ + hold += (unsigned long)(*next++) << bits; \ + bits += 8; \ + } while (0) + +/* Assure that there are at least n bits in the bit accumulator. If there is + not enough available input to do that, then return from inflate(). */ +#define NEEDBITS(n) \ + do { \ + while (bits < (unsigned)(n)) \ + PULLBYTE(); \ + } while (0) + +/* Return the low n bits of the bit accumulator (n < 16) */ +#define BITS(n) \ + ((unsigned)hold & ((1U << (n)) - 1)) + +/* Remove n bits from the bit accumulator */ +#define DROPBITS(n) \ + do { \ + hold >>= (n); \ + bits -= (unsigned)(n); \ + } while (0) + +/* Remove zero to seven bits as needed to go to a byte boundary */ +#define BYTEBITS() \ + do { \ + hold >>= bits & 7; \ + bits -= bits & 7; \ + } while (0) + +/* + inflate() uses a state machine to process as much input data and generate as + much output data as possible before returning. The state machine is + structured roughly as follows: + + for (;;) switch (state) { + ... + case STATEn: + if (not enough input data or output space to make progress) + return; + ... make progress ... + state = STATEm; + break; + ... + } + + so when inflate() is called again, the same case is attempted again, and + if the appropriate resources are provided, the machine proceeds to the + next state. The NEEDBITS() macro is usually the way the state evaluates + whether it can proceed or should return. NEEDBITS() does the return if + the requested bits are not available. The typical use of the BITS macros + is: + + NEEDBITS(n); + ... do something with BITS(n) ... + DROPBITS(n); + + where NEEDBITS(n) either returns from inflate() if there isn't enough + input left to load n bits into the accumulator, or it continues. BITS(n) + gives the low n bits in the accumulator. When done, DROPBITS(n) drops + the low n bits off the accumulator. INITBITS() clears the accumulator + and sets the number of available bits to zero. BYTEBITS() discards just + enough bits to put the accumulator on a byte boundary. After BYTEBITS() + and a NEEDBITS(8), then BITS(8) would return the next byte in the stream. + + NEEDBITS(n) uses PULLBYTE() to get an available byte of input, or to return + if there is no input available. The decoding of variable length codes uses + PULLBYTE() directly in order to pull just enough bytes to decode the next + code, and no more. + + Some states loop until they get enough input, making sure that enough + state information is maintained to continue the loop where it left off + if NEEDBITS() returns in the loop. For example, want, need, and keep + would all have to actually be part of the saved state in case NEEDBITS() + returns: + + case STATEw: + while (want < need) { + NEEDBITS(n); + keep[want++] = BITS(n); + DROPBITS(n); + } + state = STATEx; + case STATEx: + + As shown above, if the next state is also the next case, then the break + is omitted. + + A state may also return if there is not enough output space available to + complete that state. Those states are copying stored data, writing a + literal byte, and copying a matching string. + + When returning, a "goto inf_leave" is used to update the total counters, + update the check value, and determine whether any progress has been made + during that inflate() call in order to return the proper return code. + Progress is defined as a change in either strm->avail_in or strm->avail_out. + When there is a window, goto inf_leave will update the window with the last + output written. If a goto inf_leave occurs in the middle of decompression + and there is no window currently, goto inf_leave will create one and copy + output to the window for the next call of inflate(). + + In this implementation, the flush parameter of inflate() only affects the + return code (per zlib.h). inflate() always writes as much as possible to + strm->next_out, given the space available and the provided input--the effect + documented in zlib.h of Z_SYNC_FLUSH. Furthermore, inflate() always defers + the allocation of and copying into a sliding window until necessary, which + provides the effect documented in zlib.h for Z_FINISH when the entire input + stream available. So the only thing the flush parameter actually does is: + when flush is set to Z_FINISH, inflate() cannot return Z_OK. Instead it + will return Z_BUF_ERROR if it has not reached the end of the stream. + */ + +int ZEXPORT inflate(z_streamp strm, int flush) { + struct inflate_state FAR *state; + z_const unsigned char FAR *next; /* next input */ + unsigned char FAR *put; /* next output */ + unsigned have, left; /* available input and output */ + unsigned long hold; /* bit buffer */ + unsigned bits; /* bits in bit buffer */ + unsigned in, out; /* save starting available input and output */ + unsigned copy; /* number of stored or match bytes to copy */ + unsigned char FAR *from; /* where to copy match bytes from */ + code here; /* current decoding table entry */ + code last; /* parent table entry */ + unsigned len; /* length to copy for repeats, bits to drop */ + int ret; /* return code */ +#ifdef GUNZIP + unsigned char hbuf[4]; /* buffer for gzip header crc calculation */ +#endif + static const unsigned short order[19] = /* permutation of code lengths */ + {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}; + + if (inflateStateCheck(strm) || strm->next_out == Z_NULL || + (strm->next_in == Z_NULL && strm->avail_in != 0)) + return Z_STREAM_ERROR; + + state = (struct inflate_state FAR *)strm->state; + if (state->mode == TYPE) state->mode = TYPEDO; /* skip check */ + LOAD(); + in = have; + out = left; + ret = Z_OK; + for (;;) + switch (state->mode) { + case HEAD: + if (state->wrap == 0) { + state->mode = TYPEDO; + break; + } + NEEDBITS(16); +#ifdef GUNZIP + if ((state->wrap & 2) && hold == 0x8b1f) { /* gzip header */ + if (state->wbits == 0) + state->wbits = 15; + state->check = crc32(0L, Z_NULL, 0); + CRC2(state->check, hold); + INITBITS(); + state->mode = FLAGS; + break; + } + if (state->head != Z_NULL) + state->head->done = -1; + if (!(state->wrap & 1) || /* check if zlib header allowed */ +#else + if ( +#endif + ((BITS(8) << 8) + (hold >> 8)) % 31) { + strm->msg = (char *)"incorrect header check"; + state->mode = BAD; + break; + } + if (BITS(4) != Z_DEFLATED) { + strm->msg = (char *)"unknown compression method"; + state->mode = BAD; + break; + } + DROPBITS(4); + len = BITS(4) + 8; + if (state->wbits == 0) + state->wbits = len; + if (len > 15 || len > state->wbits) { + strm->msg = (char *)"invalid window size"; + state->mode = BAD; + break; + } + state->dmax = 1U << len; + state->flags = 0; /* indicate zlib header */ + Tracev((stderr, "inflate: zlib header ok\n")); + strm->adler = state->check = adler32(0L, Z_NULL, 0); + state->mode = hold & 0x200 ? DICTID : TYPE; + INITBITS(); + break; +#ifdef GUNZIP + case FLAGS: + NEEDBITS(16); + state->flags = (int)(hold); + if ((state->flags & 0xff) != Z_DEFLATED) { + strm->msg = (char *)"unknown compression method"; + state->mode = BAD; + break; + } + if (state->flags & 0xe000) { + strm->msg = (char *)"unknown header flags set"; + state->mode = BAD; + break; + } + if (state->head != Z_NULL) + state->head->text = (int)((hold >> 8) & 1); + if ((state->flags & 0x0200) && (state->wrap & 4)) + CRC2(state->check, hold); + INITBITS(); + state->mode = TIME; + /* fallthrough */ + case TIME: + NEEDBITS(32); + if (state->head != Z_NULL) + state->head->time = hold; + if ((state->flags & 0x0200) && (state->wrap & 4)) + CRC4(state->check, hold); + INITBITS(); + state->mode = OS; + /* fallthrough */ + case OS: + NEEDBITS(16); + if (state->head != Z_NULL) { + state->head->xflags = (int)(hold & 0xff); + state->head->os = (int)(hold >> 8); + } + if ((state->flags & 0x0200) && (state->wrap & 4)) + CRC2(state->check, hold); + INITBITS(); + state->mode = EXLEN; + /* fallthrough */ + case EXLEN: + if (state->flags & 0x0400) { + NEEDBITS(16); + state->length = (unsigned)(hold); + if (state->head != Z_NULL) + state->head->extra_len = (unsigned)hold; + if ((state->flags & 0x0200) && (state->wrap & 4)) + CRC2(state->check, hold); + INITBITS(); + } + else if (state->head != Z_NULL) + state->head->extra = Z_NULL; + state->mode = EXTRA; + /* fallthrough */ + case EXTRA: + if (state->flags & 0x0400) { + copy = state->length; + if (copy > have) copy = have; + if (copy) { + if (state->head != Z_NULL && + state->head->extra != Z_NULL && + (len = state->head->extra_len - state->length) < + state->head->extra_max) { + zmemcpy(state->head->extra + len, next, + len + copy > state->head->extra_max ? + state->head->extra_max - len : copy); + } + if ((state->flags & 0x0200) && (state->wrap & 4)) + state->check = crc32(state->check, next, copy); + have -= copy; + next += copy; + state->length -= copy; + } + if (state->length) goto inf_leave; + } + state->length = 0; + state->mode = NAME; + /* fallthrough */ + case NAME: + if (state->flags & 0x0800) { + if (have == 0) goto inf_leave; + copy = 0; + do { + len = (unsigned)(next[copy++]); + if (state->head != Z_NULL && + state->head->name != Z_NULL && + state->length < state->head->name_max) + state->head->name[state->length++] = (Bytef)len; + } while (len && copy < have); + if ((state->flags & 0x0200) && (state->wrap & 4)) + state->check = crc32(state->check, next, copy); + have -= copy; + next += copy; + if (len) goto inf_leave; + } + else if (state->head != Z_NULL) + state->head->name = Z_NULL; + state->length = 0; + state->mode = COMMENT; + /* fallthrough */ + case COMMENT: + if (state->flags & 0x1000) { + if (have == 0) goto inf_leave; + copy = 0; + do { + len = (unsigned)(next[copy++]); + if (state->head != Z_NULL && + state->head->comment != Z_NULL && + state->length < state->head->comm_max) + state->head->comment[state->length++] = (Bytef)len; + } while (len && copy < have); + if ((state->flags & 0x0200) && (state->wrap & 4)) + state->check = crc32(state->check, next, copy); + have -= copy; + next += copy; + if (len) goto inf_leave; + } + else if (state->head != Z_NULL) + state->head->comment = Z_NULL; + state->mode = HCRC; + /* fallthrough */ + case HCRC: + if (state->flags & 0x0200) { + NEEDBITS(16); + if ((state->wrap & 4) && hold != (state->check & 0xffff)) { + strm->msg = (char *)"header crc mismatch"; + state->mode = BAD; + break; + } + INITBITS(); + } + if (state->head != Z_NULL) { + state->head->hcrc = (int)((state->flags >> 9) & 1); + state->head->done = 1; + } + strm->adler = state->check = crc32(0L, Z_NULL, 0); + state->mode = TYPE; + break; +#endif + case DICTID: + NEEDBITS(32); + strm->adler = state->check = ZSWAP32(hold); + INITBITS(); + state->mode = DICT; + /* fallthrough */ + case DICT: + if (state->havedict == 0) { + RESTORE(); + return Z_NEED_DICT; + } + strm->adler = state->check = adler32(0L, Z_NULL, 0); + state->mode = TYPE; + /* fallthrough */ + case TYPE: + if (flush == Z_BLOCK || flush == Z_TREES) goto inf_leave; + /* fallthrough */ + case TYPEDO: + if (state->last) { + BYTEBITS(); + state->mode = CHECK; + break; + } + NEEDBITS(3); + state->last = BITS(1); + DROPBITS(1); + switch (BITS(2)) { + case 0: /* stored block */ + Tracev((stderr, "inflate: stored block%s\n", + state->last ? " (last)" : "")); + state->mode = STORED; + break; + case 1: /* fixed block */ + fixedtables(state); + Tracev((stderr, "inflate: fixed codes block%s\n", + state->last ? " (last)" : "")); + state->mode = LEN_; /* decode codes */ + if (flush == Z_TREES) { + DROPBITS(2); + goto inf_leave; + } + break; + case 2: /* dynamic block */ + Tracev((stderr, "inflate: dynamic codes block%s\n", + state->last ? " (last)" : "")); + state->mode = TABLE; + break; + case 3: + strm->msg = (char *)"invalid block type"; + state->mode = BAD; + } + DROPBITS(2); + break; + case STORED: + BYTEBITS(); /* go to byte boundary */ + NEEDBITS(32); + if ((hold & 0xffff) != ((hold >> 16) ^ 0xffff)) { + strm->msg = (char *)"invalid stored block lengths"; + state->mode = BAD; + break; + } + state->length = (unsigned)hold & 0xffff; + Tracev((stderr, "inflate: stored length %u\n", + state->length)); + INITBITS(); + state->mode = COPY_; + if (flush == Z_TREES) goto inf_leave; + /* fallthrough */ + case COPY_: + state->mode = COPY; + /* fallthrough */ + case COPY: + copy = state->length; + if (copy) { + if (copy > have) copy = have; + if (copy > left) copy = left; + if (copy == 0) goto inf_leave; + zmemcpy(put, next, copy); + have -= copy; + next += copy; + left -= copy; + put += copy; + state->length -= copy; + break; + } + Tracev((stderr, "inflate: stored end\n")); + state->mode = TYPE; + break; + case TABLE: + NEEDBITS(14); + state->nlen = BITS(5) + 257; + DROPBITS(5); + state->ndist = BITS(5) + 1; + DROPBITS(5); + state->ncode = BITS(4) + 4; + DROPBITS(4); +#ifndef PKZIP_BUG_WORKAROUND + if (state->nlen > 286 || state->ndist > 30) { + strm->msg = (char *)"too many length or distance symbols"; + state->mode = BAD; + break; + } +#endif + Tracev((stderr, "inflate: table sizes ok\n")); + state->have = 0; + state->mode = LENLENS; + /* fallthrough */ + case LENLENS: + while (state->have < state->ncode) { + NEEDBITS(3); + state->lens[order[state->have++]] = (unsigned short)BITS(3); + DROPBITS(3); + } + while (state->have < 19) + state->lens[order[state->have++]] = 0; + state->next = state->codes; + state->lencode = (const code FAR *)(state->next); + state->lenbits = 7; + ret = inflate_table(CODES, state->lens, 19, &(state->next), + &(state->lenbits), state->work); + if (ret) { + strm->msg = (char *)"invalid code lengths set"; + state->mode = BAD; + break; + } + Tracev((stderr, "inflate: code lengths ok\n")); + state->have = 0; + state->mode = CODELENS; + /* fallthrough */ + case CODELENS: + while (state->have < state->nlen + state->ndist) { + for (;;) { + here = state->lencode[BITS(state->lenbits)]; + if ((unsigned)(here.bits) <= bits) break; + PULLBYTE(); + } + if (here.val < 16) { + DROPBITS(here.bits); + state->lens[state->have++] = here.val; + } + else { + if (here.val == 16) { + NEEDBITS(here.bits + 2); + DROPBITS(here.bits); + if (state->have == 0) { + strm->msg = (char *)"invalid bit length repeat"; + state->mode = BAD; + break; + } + len = state->lens[state->have - 1]; + copy = 3 + BITS(2); + DROPBITS(2); + } + else if (here.val == 17) { + NEEDBITS(here.bits + 3); + DROPBITS(here.bits); + len = 0; + copy = 3 + BITS(3); + DROPBITS(3); + } + else { + NEEDBITS(here.bits + 7); + DROPBITS(here.bits); + len = 0; + copy = 11 + BITS(7); + DROPBITS(7); + } + if (state->have + copy > state->nlen + state->ndist) { + strm->msg = (char *)"invalid bit length repeat"; + state->mode = BAD; + break; + } + while (copy--) + state->lens[state->have++] = (unsigned short)len; + } + } + + /* handle error breaks in while */ + if (state->mode == BAD) break; + + /* check for end-of-block code (better have one) */ + if (state->lens[256] == 0) { + strm->msg = (char *)"invalid code -- missing end-of-block"; + state->mode = BAD; + break; + } + + /* build code tables -- note: do not change the lenbits or distbits + values here (9 and 6) without reading the comments in inftrees.h + concerning the ENOUGH constants, which depend on those values */ + state->next = state->codes; + state->lencode = (const code FAR *)(state->next); + state->lenbits = 9; + ret = inflate_table(LENS, state->lens, state->nlen, &(state->next), + &(state->lenbits), state->work); + if (ret) { + strm->msg = (char *)"invalid literal/lengths set"; + state->mode = BAD; + break; + } + state->distcode = (const code FAR *)(state->next); + state->distbits = 6; + ret = inflate_table(DISTS, state->lens + state->nlen, state->ndist, + &(state->next), &(state->distbits), state->work); + if (ret) { + strm->msg = (char *)"invalid distances set"; + state->mode = BAD; + break; + } + Tracev((stderr, "inflate: codes ok\n")); + state->mode = LEN_; + if (flush == Z_TREES) goto inf_leave; + /* fallthrough */ + case LEN_: + state->mode = LEN; + /* fallthrough */ + case LEN: + if (have >= 6 && left >= 258) { + RESTORE(); + inflate_fast(strm, out); + LOAD(); + if (state->mode == TYPE) + state->back = -1; + break; + } + state->back = 0; + for (;;) { + here = state->lencode[BITS(state->lenbits)]; + if ((unsigned)(here.bits) <= bits) break; + PULLBYTE(); + } + if (here.op && (here.op & 0xf0) == 0) { + last = here; + for (;;) { + here = state->lencode[last.val + + (BITS(last.bits + last.op) >> last.bits)]; + if ((unsigned)(last.bits + here.bits) <= bits) break; + PULLBYTE(); + } + DROPBITS(last.bits); + state->back += last.bits; + } + DROPBITS(here.bits); + state->back += here.bits; + state->length = (unsigned)here.val; + if ((int)(here.op) == 0) { + Tracevv((stderr, here.val >= 0x20 && here.val < 0x7f ? + "inflate: literal '%c'\n" : + "inflate: literal 0x%02x\n", here.val)); + state->mode = LIT; + break; + } + if (here.op & 32) { + Tracevv((stderr, "inflate: end of block\n")); + state->back = -1; + state->mode = TYPE; + break; + } + if (here.op & 64) { + strm->msg = (char *)"invalid literal/length code"; + state->mode = BAD; + break; + } + state->extra = (unsigned)(here.op) & 15; + state->mode = LENEXT; + /* fallthrough */ + case LENEXT: + if (state->extra) { + NEEDBITS(state->extra); + state->length += BITS(state->extra); + DROPBITS(state->extra); + state->back += state->extra; + } + Tracevv((stderr, "inflate: length %u\n", state->length)); + state->was = state->length; + state->mode = DIST; + /* fallthrough */ + case DIST: + for (;;) { + here = state->distcode[BITS(state->distbits)]; + if ((unsigned)(here.bits) <= bits) break; + PULLBYTE(); + } + if ((here.op & 0xf0) == 0) { + last = here; + for (;;) { + here = state->distcode[last.val + + (BITS(last.bits + last.op) >> last.bits)]; + if ((unsigned)(last.bits + here.bits) <= bits) break; + PULLBYTE(); + } + DROPBITS(last.bits); + state->back += last.bits; + } + DROPBITS(here.bits); + state->back += here.bits; + if (here.op & 64) { + strm->msg = (char *)"invalid distance code"; + state->mode = BAD; + break; + } + state->offset = (unsigned)here.val; + state->extra = (unsigned)(here.op) & 15; + state->mode = DISTEXT; + /* fallthrough */ + case DISTEXT: + if (state->extra) { + NEEDBITS(state->extra); + state->offset += BITS(state->extra); + DROPBITS(state->extra); + state->back += state->extra; + } +#ifdef INFLATE_STRICT + if (state->offset > state->dmax) { + strm->msg = (char *)"invalid distance too far back"; + state->mode = BAD; + break; + } +#endif + Tracevv((stderr, "inflate: distance %u\n", state->offset)); + state->mode = MATCH; + /* fallthrough */ + case MATCH: + if (left == 0) goto inf_leave; + copy = out - left; + if (state->offset > copy) { /* copy from window */ + copy = state->offset - copy; + if (copy > state->whave) { + if (state->sane) { + strm->msg = (char *)"invalid distance too far back"; + state->mode = BAD; + break; + } +#ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR + Trace((stderr, "inflate.c too far\n")); + copy -= state->whave; + if (copy > state->length) copy = state->length; + if (copy > left) copy = left; + left -= copy; + state->length -= copy; + do { + *put++ = 0; + } while (--copy); + if (state->length == 0) state->mode = LEN; + break; +#endif + } + if (copy > state->wnext) { + copy -= state->wnext; + from = state->window + (state->wsize - copy); + } + else + from = state->window + (state->wnext - copy); + if (copy > state->length) copy = state->length; + } + else { /* copy from output */ + from = put - state->offset; + copy = state->length; + } + if (copy > left) copy = left; + left -= copy; + state->length -= copy; + do { + *put++ = *from++; + } while (--copy); + if (state->length == 0) state->mode = LEN; + break; + case LIT: + if (left == 0) goto inf_leave; + *put++ = (unsigned char)(state->length); + left--; + state->mode = LEN; + break; + case CHECK: + if (state->wrap) { + NEEDBITS(32); + out -= left; + strm->total_out += out; + state->total += out; + if ((state->wrap & 4) && out) + strm->adler = state->check = + UPDATE_CHECK(state->check, put - out, out); + out = left; + if ((state->wrap & 4) && ( +#ifdef GUNZIP + state->flags ? hold : +#endif + ZSWAP32(hold)) != state->check) { + strm->msg = (char *)"incorrect data check"; + state->mode = BAD; + break; + } + INITBITS(); + Tracev((stderr, "inflate: check matches trailer\n")); + } +#ifdef GUNZIP + state->mode = LENGTH; + /* fallthrough */ + case LENGTH: + if (state->wrap && state->flags) { + NEEDBITS(32); + if ((state->wrap & 4) && hold != (state->total & 0xffffffff)) { + strm->msg = (char *)"incorrect length check"; + state->mode = BAD; + break; + } + INITBITS(); + Tracev((stderr, "inflate: length matches trailer\n")); + } +#endif + state->mode = DONE; + /* fallthrough */ + case DONE: + ret = Z_STREAM_END; + goto inf_leave; + case BAD: + ret = Z_DATA_ERROR; + goto inf_leave; + case MEM: + return Z_MEM_ERROR; + case SYNC: + /* fallthrough */ + default: + return Z_STREAM_ERROR; + } + + /* + Return from inflate(), updating the total counts and the check value. + If there was no progress during the inflate() call, return a buffer + error. Call updatewindow() to create and/or update the window state. + Note: a memory error from inflate() is non-recoverable. + */ + inf_leave: + RESTORE(); + if (state->wsize || (out != strm->avail_out && state->mode < BAD && + (state->mode < CHECK || flush != Z_FINISH))) + if (updatewindow(strm, strm->next_out, out - strm->avail_out)) { + state->mode = MEM; + return Z_MEM_ERROR; + } + in -= strm->avail_in; + out -= strm->avail_out; + strm->total_in += in; + strm->total_out += out; + state->total += out; + if ((state->wrap & 4) && out) + strm->adler = state->check = + UPDATE_CHECK(state->check, strm->next_out - out, out); + strm->data_type = (int)state->bits + (state->last ? 64 : 0) + + (state->mode == TYPE ? 128 : 0) + + (state->mode == LEN_ || state->mode == COPY_ ? 256 : 0); + if (((in == 0 && out == 0) || flush == Z_FINISH) && ret == Z_OK) + ret = Z_BUF_ERROR; + return ret; +} + +int ZEXPORT inflateEnd(z_streamp strm) { + struct inflate_state FAR *state; + if (inflateStateCheck(strm)) + return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + if (state->window != Z_NULL) ZFREE(strm, state->window); + ZFREE(strm, strm->state); + strm->state = Z_NULL; + Tracev((stderr, "inflate: end\n")); + return Z_OK; +} + +int ZEXPORT inflateGetDictionary(z_streamp strm, Bytef *dictionary, + uInt *dictLength) { + struct inflate_state FAR *state; + + /* check state */ + if (inflateStateCheck(strm)) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + + /* copy dictionary */ + if (state->whave && dictionary != Z_NULL) { + zmemcpy(dictionary, state->window + state->wnext, + state->whave - state->wnext); + zmemcpy(dictionary + state->whave - state->wnext, + state->window, state->wnext); + } + if (dictLength != Z_NULL) + *dictLength = state->whave; + return Z_OK; +} + +int ZEXPORT inflateSetDictionary(z_streamp strm, const Bytef *dictionary, + uInt dictLength) { + struct inflate_state FAR *state; + unsigned long dictid; + int ret; + + /* check state */ + if (inflateStateCheck(strm)) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + if (state->wrap != 0 && state->mode != DICT) + return Z_STREAM_ERROR; + + /* check for correct dictionary identifier */ + if (state->mode == DICT) { + dictid = adler32(0L, Z_NULL, 0); + dictid = adler32(dictid, dictionary, dictLength); + if (dictid != state->check) + return Z_DATA_ERROR; + } + + /* copy dictionary to window using updatewindow(), which will amend the + existing dictionary if appropriate */ + ret = updatewindow(strm, dictionary + dictLength, dictLength); + if (ret) { + state->mode = MEM; + return Z_MEM_ERROR; + } + state->havedict = 1; + Tracev((stderr, "inflate: dictionary set\n")); + return Z_OK; +} + +int ZEXPORT inflateGetHeader(z_streamp strm, gz_headerp head) { + struct inflate_state FAR *state; + + /* check state */ + if (inflateStateCheck(strm)) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + if ((state->wrap & 2) == 0) return Z_STREAM_ERROR; + + /* save header structure */ + state->head = head; + head->done = 0; + return Z_OK; +} + +/* + Search buf[0..len-1] for the pattern: 0, 0, 0xff, 0xff. Return when found + or when out of input. When called, *have is the number of pattern bytes + found in order so far, in 0..3. On return *have is updated to the new + state. If on return *have equals four, then the pattern was found and the + return value is how many bytes were read including the last byte of the + pattern. If *have is less than four, then the pattern has not been found + yet and the return value is len. In the latter case, syncsearch() can be + called again with more data and the *have state. *have is initialized to + zero for the first call. + */ +local unsigned syncsearch(unsigned FAR *have, const unsigned char FAR *buf, + unsigned len) { + unsigned got; + unsigned next; + + got = *have; + next = 0; + while (next < len && got < 4) { + if ((int)(buf[next]) == (got < 2 ? 0 : 0xff)) + got++; + else if (buf[next]) + got = 0; + else + got = 4 - got; + next++; + } + *have = got; + return next; +} + +int ZEXPORT inflateSync(z_streamp strm) { + unsigned len; /* number of bytes to look at or looked at */ + int flags; /* temporary to save header status */ + unsigned long in, out; /* temporary to save total_in and total_out */ + unsigned char buf[4]; /* to restore bit buffer to byte string */ + struct inflate_state FAR *state; + + /* check parameters */ + if (inflateStateCheck(strm)) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + if (strm->avail_in == 0 && state->bits < 8) return Z_BUF_ERROR; + + /* if first time, start search in bit buffer */ + if (state->mode != SYNC) { + state->mode = SYNC; + state->hold >>= state->bits & 7; + state->bits -= state->bits & 7; + len = 0; + while (state->bits >= 8) { + buf[len++] = (unsigned char)(state->hold); + state->hold >>= 8; + state->bits -= 8; + } + state->have = 0; + syncsearch(&(state->have), buf, len); + } + + /* search available input */ + len = syncsearch(&(state->have), strm->next_in, strm->avail_in); + strm->avail_in -= len; + strm->next_in += len; + strm->total_in += len; + + /* return no joy or set up to restart inflate() on a new block */ + if (state->have != 4) return Z_DATA_ERROR; + if (state->flags == -1) + state->wrap = 0; /* if no header yet, treat as raw */ + else + state->wrap &= ~4; /* no point in computing a check value now */ + flags = state->flags; + in = strm->total_in; out = strm->total_out; + inflateReset(strm); + strm->total_in = in; strm->total_out = out; + state->flags = flags; + state->mode = TYPE; + return Z_OK; +} + +/* + Returns true if inflate is currently at the end of a block generated by + Z_SYNC_FLUSH or Z_FULL_FLUSH. This function is used by one PPP + implementation to provide an additional safety check. PPP uses + Z_SYNC_FLUSH but removes the length bytes of the resulting empty stored + block. When decompressing, PPP checks that at the end of input packet, + inflate is waiting for these length bytes. + */ +int ZEXPORT inflateSyncPoint(z_streamp strm) { + struct inflate_state FAR *state; + + if (inflateStateCheck(strm)) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + return state->mode == STORED && state->bits == 0; +} + +int ZEXPORT inflateCopy(z_streamp dest, z_streamp source) { + struct inflate_state FAR *state; + struct inflate_state FAR *copy; + unsigned char FAR *window; + unsigned wsize; + + /* check input */ + if (inflateStateCheck(source) || dest == Z_NULL) + return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)source->state; + + /* allocate space */ + copy = (struct inflate_state FAR *) + ZALLOC(source, 1, sizeof(struct inflate_state)); + if (copy == Z_NULL) return Z_MEM_ERROR; + window = Z_NULL; + if (state->window != Z_NULL) { + window = (unsigned char FAR *) + ZALLOC(source, 1U << state->wbits, sizeof(unsigned char)); + if (window == Z_NULL) { + ZFREE(source, copy); + return Z_MEM_ERROR; + } + } + + /* copy state */ + zmemcpy((voidpf)dest, (voidpf)source, sizeof(z_stream)); + zmemcpy((voidpf)copy, (voidpf)state, sizeof(struct inflate_state)); + copy->strm = dest; + if (state->lencode >= state->codes && + state->lencode <= state->codes + ENOUGH - 1) { + copy->lencode = copy->codes + (state->lencode - state->codes); + copy->distcode = copy->codes + (state->distcode - state->codes); + } + copy->next = copy->codes + (state->next - state->codes); + if (window != Z_NULL) { + wsize = 1U << state->wbits; + zmemcpy(window, state->window, wsize); + } + copy->window = window; + dest->state = (struct internal_state FAR *)copy; + return Z_OK; +} + +int ZEXPORT inflateUndermine(z_streamp strm, int subvert) { + struct inflate_state FAR *state; + + if (inflateStateCheck(strm)) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; +#ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR + state->sane = !subvert; + return Z_OK; +#else + (void)subvert; + state->sane = 1; + return Z_DATA_ERROR; +#endif +} + +int ZEXPORT inflateValidate(z_streamp strm, int check) { + struct inflate_state FAR *state; + + if (inflateStateCheck(strm)) return Z_STREAM_ERROR; + state = (struct inflate_state FAR *)strm->state; + if (check && state->wrap) + state->wrap |= 4; + else + state->wrap &= ~4; + return Z_OK; +} + +long ZEXPORT inflateMark(z_streamp strm) { + struct inflate_state FAR *state; + + if (inflateStateCheck(strm)) + return -(1L << 16); + state = (struct inflate_state FAR *)strm->state; + return (long)(((unsigned long)((long)state->back)) << 16) + + (state->mode == COPY ? state->length : + (state->mode == MATCH ? state->was - state->length : 0)); +} + +unsigned long ZEXPORT inflateCodesUsed(z_streamp strm) { + struct inflate_state FAR *state; + if (inflateStateCheck(strm)) return (unsigned long)-1; + state = (struct inflate_state FAR *)strm->state; + return (unsigned long)(state->next - state->codes); +} diff --git a/c-blosc/internal-complibs/zlib-1.3.1/inflate.h b/c-blosc/internal-complibs/zlib-1.3.1/inflate.h new file mode 100644 index 0000000..f127b6b --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.3.1/inflate.h @@ -0,0 +1,126 @@ +/* inflate.h -- internal inflate state definition + * Copyright (C) 1995-2019 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +/* define NO_GZIP when compiling if you want to disable gzip header and + trailer decoding by inflate(). NO_GZIP would be used to avoid linking in + the crc code when it is not needed. For shared libraries, gzip decoding + should be left enabled. */ +#ifndef NO_GZIP +# define GUNZIP +#endif + +/* Possible inflate modes between inflate() calls */ +typedef enum { + HEAD = 16180, /* i: waiting for magic header */ + FLAGS, /* i: waiting for method and flags (gzip) */ + TIME, /* i: waiting for modification time (gzip) */ + OS, /* i: waiting for extra flags and operating system (gzip) */ + EXLEN, /* i: waiting for extra length (gzip) */ + EXTRA, /* i: waiting for extra bytes (gzip) */ + NAME, /* i: waiting for end of file name (gzip) */ + COMMENT, /* i: waiting for end of comment (gzip) */ + HCRC, /* i: waiting for header crc (gzip) */ + DICTID, /* i: waiting for dictionary check value */ + DICT, /* waiting for inflateSetDictionary() call */ + TYPE, /* i: waiting for type bits, including last-flag bit */ + TYPEDO, /* i: same, but skip check to exit inflate on new block */ + STORED, /* i: waiting for stored size (length and complement) */ + COPY_, /* i/o: same as COPY below, but only first time in */ + COPY, /* i/o: waiting for input or output to copy stored block */ + TABLE, /* i: waiting for dynamic block table lengths */ + LENLENS, /* i: waiting for code length code lengths */ + CODELENS, /* i: waiting for length/lit and distance code lengths */ + LEN_, /* i: same as LEN below, but only first time in */ + LEN, /* i: waiting for length/lit/eob code */ + LENEXT, /* i: waiting for length extra bits */ + DIST, /* i: waiting for distance code */ + DISTEXT, /* i: waiting for distance extra bits */ + MATCH, /* o: waiting for output space to copy string */ + LIT, /* o: waiting for output space to write literal */ + CHECK, /* i: waiting for 32-bit check value */ + LENGTH, /* i: waiting for 32-bit length (gzip) */ + DONE, /* finished check, done -- remain here until reset */ + BAD, /* got a data error -- remain here until reset */ + MEM, /* got an inflate() memory error -- remain here until reset */ + SYNC /* looking for synchronization bytes to restart inflate() */ +} inflate_mode; + +/* + State transitions between above modes - + + (most modes can go to BAD or MEM on error -- not shown for clarity) + + Process header: + HEAD -> (gzip) or (zlib) or (raw) + (gzip) -> FLAGS -> TIME -> OS -> EXLEN -> EXTRA -> NAME -> COMMENT -> + HCRC -> TYPE + (zlib) -> DICTID or TYPE + DICTID -> DICT -> TYPE + (raw) -> TYPEDO + Read deflate blocks: + TYPE -> TYPEDO -> STORED or TABLE or LEN_ or CHECK + STORED -> COPY_ -> COPY -> TYPE + TABLE -> LENLENS -> CODELENS -> LEN_ + LEN_ -> LEN + Read deflate codes in fixed or dynamic block: + LEN -> LENEXT or LIT or TYPE + LENEXT -> DIST -> DISTEXT -> MATCH -> LEN + LIT -> LEN + Process trailer: + CHECK -> LENGTH -> DONE + */ + +/* State maintained between inflate() calls -- approximately 7K bytes, not + including the allocated sliding window, which is up to 32K bytes. */ +struct inflate_state { + z_streamp strm; /* pointer back to this zlib stream */ + inflate_mode mode; /* current inflate mode */ + int last; /* true if processing last block */ + int wrap; /* bit 0 true for zlib, bit 1 true for gzip, + bit 2 true to validate check value */ + int havedict; /* true if dictionary provided */ + int flags; /* gzip header method and flags, 0 if zlib, or + -1 if raw or no header yet */ + unsigned dmax; /* zlib header max distance (INFLATE_STRICT) */ + unsigned long check; /* protected copy of check value */ + unsigned long total; /* protected copy of output count */ + gz_headerp head; /* where to save gzip header information */ + /* sliding window */ + unsigned wbits; /* log base 2 of requested window size */ + unsigned wsize; /* window size or zero if not using window */ + unsigned whave; /* valid bytes in the window */ + unsigned wnext; /* window write index */ + unsigned char FAR *window; /* allocated sliding window, if needed */ + /* bit accumulator */ + unsigned long hold; /* input bit accumulator */ + unsigned bits; /* number of bits in "in" */ + /* for string and stored block copying */ + unsigned length; /* literal or length of data to copy */ + unsigned offset; /* distance back to copy string from */ + /* for table and code decoding */ + unsigned extra; /* extra bits needed */ + /* fixed and dynamic code tables */ + code const FAR *lencode; /* starting table for length/literal codes */ + code const FAR *distcode; /* starting table for distance codes */ + unsigned lenbits; /* index bits for lencode */ + unsigned distbits; /* index bits for distcode */ + /* dynamic table building */ + unsigned ncode; /* number of code length code lengths */ + unsigned nlen; /* number of length code lengths */ + unsigned ndist; /* number of distance code lengths */ + unsigned have; /* number of code lengths in lens[] */ + code FAR *next; /* next available space in codes[] */ + unsigned short lens[320]; /* temporary storage for code lengths */ + unsigned short work[288]; /* work area for code table building */ + code codes[ENOUGH]; /* space for code tables */ + int sane; /* if false, allow invalid distance too far */ + int back; /* bits back of last unprocessed length/lit */ + unsigned was; /* initial length of match */ +}; diff --git a/c-blosc/internal-complibs/zlib-1.3.1/inftrees.c b/c-blosc/internal-complibs/zlib-1.3.1/inftrees.c new file mode 100644 index 0000000..98cfe16 --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.3.1/inftrees.c @@ -0,0 +1,299 @@ +/* inftrees.c -- generate Huffman trees for efficient decoding + * Copyright (C) 1995-2024 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +#include "zutil.h" +#include "inftrees.h" + +#define MAXBITS 15 + +const char inflate_copyright[] = + " inflate 1.3.1 Copyright 1995-2024 Mark Adler "; +/* + If you use the zlib library in a product, an acknowledgment is welcome + in the documentation of your product. If for some reason you cannot + include such an acknowledgment, I would appreciate that you keep this + copyright string in the executable of your product. + */ + +/* + Build a set of tables to decode the provided canonical Huffman code. + The code lengths are lens[0..codes-1]. The result starts at *table, + whose indices are 0..2^bits-1. work is a writable array of at least + lens shorts, which is used as a work area. type is the type of code + to be generated, CODES, LENS, or DISTS. On return, zero is success, + -1 is an invalid code, and +1 means that ENOUGH isn't enough. table + on return points to the next available entry's address. bits is the + requested root table index bits, and on return it is the actual root + table index bits. It will differ if the request is greater than the + longest code or if it is less than the shortest code. + */ +int ZLIB_INTERNAL inflate_table(codetype type, unsigned short FAR *lens, + unsigned codes, code FAR * FAR *table, + unsigned FAR *bits, unsigned short FAR *work) { + unsigned len; /* a code's length in bits */ + unsigned sym; /* index of code symbols */ + unsigned min, max; /* minimum and maximum code lengths */ + unsigned root; /* number of index bits for root table */ + unsigned curr; /* number of index bits for current table */ + unsigned drop; /* code bits to drop for sub-table */ + int left; /* number of prefix codes available */ + unsigned used; /* code entries in table used */ + unsigned huff; /* Huffman code */ + unsigned incr; /* for incrementing code, index */ + unsigned fill; /* index for replicating entries */ + unsigned low; /* low bits for current root entry */ + unsigned mask; /* mask for low root bits */ + code here; /* table entry for duplication */ + code FAR *next; /* next available space in table */ + const unsigned short FAR *base; /* base value table to use */ + const unsigned short FAR *extra; /* extra bits table to use */ + unsigned match; /* use base and extra for symbol >= match */ + unsigned short count[MAXBITS+1]; /* number of codes of each length */ + unsigned short offs[MAXBITS+1]; /* offsets in table for each length */ + static const unsigned short lbase[31] = { /* Length codes 257..285 base */ + 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, + 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0}; + static const unsigned short lext[31] = { /* Length codes 257..285 extra */ + 16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18, + 19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 203, 77}; + static const unsigned short dbase[32] = { /* Distance codes 0..29 base */ + 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, + 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, + 8193, 12289, 16385, 24577, 0, 0}; + static const unsigned short dext[32] = { /* Distance codes 0..29 extra */ + 16, 16, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22, + 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, + 28, 28, 29, 29, 64, 64}; + + /* + Process a set of code lengths to create a canonical Huffman code. The + code lengths are lens[0..codes-1]. Each length corresponds to the + symbols 0..codes-1. The Huffman code is generated by first sorting the + symbols by length from short to long, and retaining the symbol order + for codes with equal lengths. Then the code starts with all zero bits + for the first code of the shortest length, and the codes are integer + increments for the same length, and zeros are appended as the length + increases. For the deflate format, these bits are stored backwards + from their more natural integer increment ordering, and so when the + decoding tables are built in the large loop below, the integer codes + are incremented backwards. + + This routine assumes, but does not check, that all of the entries in + lens[] are in the range 0..MAXBITS. The caller must assure this. + 1..MAXBITS is interpreted as that code length. zero means that that + symbol does not occur in this code. + + The codes are sorted by computing a count of codes for each length, + creating from that a table of starting indices for each length in the + sorted table, and then entering the symbols in order in the sorted + table. The sorted table is work[], with that space being provided by + the caller. + + The length counts are used for other purposes as well, i.e. finding + the minimum and maximum length codes, determining if there are any + codes at all, checking for a valid set of lengths, and looking ahead + at length counts to determine sub-table sizes when building the + decoding tables. + */ + + /* accumulate lengths for codes (assumes lens[] all in 0..MAXBITS) */ + for (len = 0; len <= MAXBITS; len++) + count[len] = 0; + for (sym = 0; sym < codes; sym++) + count[lens[sym]]++; + + /* bound code lengths, force root to be within code lengths */ + root = *bits; + for (max = MAXBITS; max >= 1; max--) + if (count[max] != 0) break; + if (root > max) root = max; + if (max == 0) { /* no symbols to code at all */ + here.op = (unsigned char)64; /* invalid code marker */ + here.bits = (unsigned char)1; + here.val = (unsigned short)0; + *(*table)++ = here; /* make a table to force an error */ + *(*table)++ = here; + *bits = 1; + return 0; /* no symbols, but wait for decoding to report error */ + } + for (min = 1; min < max; min++) + if (count[min] != 0) break; + if (root < min) root = min; + + /* check for an over-subscribed or incomplete set of lengths */ + left = 1; + for (len = 1; len <= MAXBITS; len++) { + left <<= 1; + left -= count[len]; + if (left < 0) return -1; /* over-subscribed */ + } + if (left > 0 && (type == CODES || max != 1)) + return -1; /* incomplete set */ + + /* generate offsets into symbol table for each length for sorting */ + offs[1] = 0; + for (len = 1; len < MAXBITS; len++) + offs[len + 1] = offs[len] + count[len]; + + /* sort symbols by length, by symbol order within each length */ + for (sym = 0; sym < codes; sym++) + if (lens[sym] != 0) work[offs[lens[sym]]++] = (unsigned short)sym; + + /* + Create and fill in decoding tables. In this loop, the table being + filled is at next and has curr index bits. The code being used is huff + with length len. That code is converted to an index by dropping drop + bits off of the bottom. For codes where len is less than drop + curr, + those top drop + curr - len bits are incremented through all values to + fill the table with replicated entries. + + root is the number of index bits for the root table. When len exceeds + root, sub-tables are created pointed to by the root entry with an index + of the low root bits of huff. This is saved in low to check for when a + new sub-table should be started. drop is zero when the root table is + being filled, and drop is root when sub-tables are being filled. + + When a new sub-table is needed, it is necessary to look ahead in the + code lengths to determine what size sub-table is needed. The length + counts are used for this, and so count[] is decremented as codes are + entered in the tables. + + used keeps track of how many table entries have been allocated from the + provided *table space. It is checked for LENS and DIST tables against + the constants ENOUGH_LENS and ENOUGH_DISTS to guard against changes in + the initial root table size constants. See the comments in inftrees.h + for more information. + + sym increments through all symbols, and the loop terminates when + all codes of length max, i.e. all codes, have been processed. This + routine permits incomplete codes, so another loop after this one fills + in the rest of the decoding tables with invalid code markers. + */ + + /* set up for code type */ + switch (type) { + case CODES: + base = extra = work; /* dummy value--not used */ + match = 20; + break; + case LENS: + base = lbase; + extra = lext; + match = 257; + break; + default: /* DISTS */ + base = dbase; + extra = dext; + match = 0; + } + + /* initialize state for loop */ + huff = 0; /* starting code */ + sym = 0; /* starting code symbol */ + len = min; /* starting code length */ + next = *table; /* current table to fill in */ + curr = root; /* current table index bits */ + drop = 0; /* current bits to drop from code for index */ + low = (unsigned)(-1); /* trigger new sub-table when len > root */ + used = 1U << root; /* use root table entries */ + mask = used - 1; /* mask for comparing low */ + + /* check available table space */ + if ((type == LENS && used > ENOUGH_LENS) || + (type == DISTS && used > ENOUGH_DISTS)) + return 1; + + /* process all codes and make table entries */ + for (;;) { + /* create table entry */ + here.bits = (unsigned char)(len - drop); + if (work[sym] + 1U < match) { + here.op = (unsigned char)0; + here.val = work[sym]; + } + else if (work[sym] >= match) { + here.op = (unsigned char)(extra[work[sym] - match]); + here.val = base[work[sym] - match]; + } + else { + here.op = (unsigned char)(32 + 64); /* end of block */ + here.val = 0; + } + + /* replicate for those indices with low len bits equal to huff */ + incr = 1U << (len - drop); + fill = 1U << curr; + min = fill; /* save offset to next table */ + do { + fill -= incr; + next[(huff >> drop) + fill] = here; + } while (fill != 0); + + /* backwards increment the len-bit code huff */ + incr = 1U << (len - 1); + while (huff & incr) + incr >>= 1; + if (incr != 0) { + huff &= incr - 1; + huff += incr; + } + else + huff = 0; + + /* go to next symbol, update count, len */ + sym++; + if (--(count[len]) == 0) { + if (len == max) break; + len = lens[work[sym]]; + } + + /* create new sub-table if needed */ + if (len > root && (huff & mask) != low) { + /* if first time, transition to sub-tables */ + if (drop == 0) + drop = root; + + /* increment past last table */ + next += min; /* here min is 1 << curr */ + + /* determine length of next table */ + curr = len - drop; + left = (int)(1 << curr); + while (curr + drop < max) { + left -= count[curr + drop]; + if (left <= 0) break; + curr++; + left <<= 1; + } + + /* check for enough space */ + used += 1U << curr; + if ((type == LENS && used > ENOUGH_LENS) || + (type == DISTS && used > ENOUGH_DISTS)) + return 1; + + /* point entry in root table to sub-table */ + low = huff & mask; + (*table)[low].op = (unsigned char)curr; + (*table)[low].bits = (unsigned char)root; + (*table)[low].val = (unsigned short)(next - *table); + } + } + + /* fill in remaining table entry if code is incomplete (guaranteed to have + at most one remaining entry, since if the code is incomplete, the + maximum code length that was allowed to get this far is one bit) */ + if (huff != 0) { + here.op = (unsigned char)64; /* invalid code marker */ + here.bits = (unsigned char)(len - drop); + here.val = (unsigned short)0; + next[huff] = here; + } + + /* set return parameters */ + *table += used; + *bits = root; + return 0; +} diff --git a/c-blosc/internal-complibs/zlib-1.3.1/inftrees.h b/c-blosc/internal-complibs/zlib-1.3.1/inftrees.h new file mode 100644 index 0000000..396f74b --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.3.1/inftrees.h @@ -0,0 +1,62 @@ +/* inftrees.h -- header to use inftrees.c + * Copyright (C) 1995-2005, 2010 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +/* Structure for decoding tables. Each entry provides either the + information needed to do the operation requested by the code that + indexed that table entry, or it provides a pointer to another + table that indexes more bits of the code. op indicates whether + the entry is a pointer to another table, a literal, a length or + distance, an end-of-block, or an invalid code. For a table + pointer, the low four bits of op is the number of index bits of + that table. For a length or distance, the low four bits of op + is the number of extra bits to get after the code. bits is + the number of bits in this code or part of the code to drop off + of the bit buffer. val is the actual byte to output in the case + of a literal, the base length or distance, or the offset from + the current table to the next table. Each entry is four bytes. */ +typedef struct { + unsigned char op; /* operation, extra bits, table bits */ + unsigned char bits; /* bits in this part of the code */ + unsigned short val; /* offset in table or code value */ +} code; + +/* op values as set by inflate_table(): + 00000000 - literal + 0000tttt - table link, tttt != 0 is the number of table index bits + 0001eeee - length or distance, eeee is the number of extra bits + 01100000 - end of block + 01000000 - invalid code + */ + +/* Maximum size of the dynamic table. The maximum number of code structures is + 1444, which is the sum of 852 for literal/length codes and 592 for distance + codes. These values were found by exhaustive searches using the program + examples/enough.c found in the zlib distribution. The arguments to that + program are the number of symbols, the initial root table size, and the + maximum bit length of a code. "enough 286 9 15" for literal/length codes + returns 852, and "enough 30 6 15" for distance codes returns 592. The + initial root table size (9 or 6) is found in the fifth argument of the + inflate_table() calls in inflate.c and infback.c. If the root table size is + changed, then these maximum sizes would be need to be recalculated and + updated. */ +#define ENOUGH_LENS 852 +#define ENOUGH_DISTS 592 +#define ENOUGH (ENOUGH_LENS+ENOUGH_DISTS) + +/* Type of code to build for inflate_table() */ +typedef enum { + CODES, + LENS, + DISTS +} codetype; + +int ZLIB_INTERNAL inflate_table(codetype type, unsigned short FAR *lens, + unsigned codes, code FAR * FAR *table, + unsigned FAR *bits, unsigned short FAR *work); diff --git a/c-blosc/internal-complibs/zlib-1.3.1/test/example.c b/c-blosc/internal-complibs/zlib-1.3.1/test/example.c new file mode 100644 index 0000000..c3521dd --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.3.1/test/example.c @@ -0,0 +1,546 @@ +/* example.c -- usage example of the zlib compression library + * Copyright (C) 1995-2006, 2011, 2016 Jean-loup Gailly + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* @(#) $Id$ */ + +#include "zlib.h" +#include + +#ifdef STDC +# include +# include +#endif + +#if defined(VMS) || defined(RISCOS) +# define TESTFILE "foo-gz" +#else +# define TESTFILE "foo.gz" +#endif + +#define CHECK_ERR(err, msg) { \ + if (err != Z_OK) { \ + fprintf(stderr, "%s error: %d\n", msg, err); \ + exit(1); \ + } \ +} + +static z_const char hello[] = "hello, hello!"; +/* "hello world" would be more standard, but the repeated "hello" + * stresses the compression code better, sorry... + */ + +static const char dictionary[] = "hello"; +static uLong dictId; /* Adler32 value of the dictionary */ + +#ifdef Z_SOLO + +static void *myalloc(void *q, unsigned n, unsigned m) { + (void)q; + return calloc(n, m); +} + +static void myfree(void *q, void *p) { + (void)q; + free(p); +} + +static alloc_func zalloc = myalloc; +static free_func zfree = myfree; + +#else /* !Z_SOLO */ + +static alloc_func zalloc = (alloc_func)0; +static free_func zfree = (free_func)0; + +/* =========================================================================== + * Test compress() and uncompress() + */ +static void test_compress(Byte *compr, uLong comprLen, Byte *uncompr, + uLong uncomprLen) { + int err; + uLong len = (uLong)strlen(hello)+1; + + err = compress(compr, &comprLen, (const Bytef*)hello, len); + CHECK_ERR(err, "compress"); + + strcpy((char*)uncompr, "garbage"); + + err = uncompress(uncompr, &uncomprLen, compr, comprLen); + CHECK_ERR(err, "uncompress"); + + if (strcmp((char*)uncompr, hello)) { + fprintf(stderr, "bad uncompress\n"); + exit(1); + } else { + printf("uncompress(): %s\n", (char *)uncompr); + } +} + +/* =========================================================================== + * Test read/write of .gz files + */ +static void test_gzio(const char *fname, Byte *uncompr, uLong uncomprLen) { +#ifdef NO_GZCOMPRESS + fprintf(stderr, "NO_GZCOMPRESS -- gz* functions cannot compress\n"); +#else + int err; + int len = (int)strlen(hello)+1; + gzFile file; + z_off_t pos; + + file = gzopen(fname, "wb"); + if (file == NULL) { + fprintf(stderr, "gzopen error\n"); + exit(1); + } + gzputc(file, 'h'); + if (gzputs(file, "ello") != 4) { + fprintf(stderr, "gzputs err: %s\n", gzerror(file, &err)); + exit(1); + } + if (gzprintf(file, ", %s!", "hello") != 8) { + fprintf(stderr, "gzprintf err: %s\n", gzerror(file, &err)); + exit(1); + } + gzseek(file, 1L, SEEK_CUR); /* add one zero byte */ + gzclose(file); + + file = gzopen(fname, "rb"); + if (file == NULL) { + fprintf(stderr, "gzopen error\n"); + exit(1); + } + strcpy((char*)uncompr, "garbage"); + + if (gzread(file, uncompr, (unsigned)uncomprLen) != len) { + fprintf(stderr, "gzread err: %s\n", gzerror(file, &err)); + exit(1); + } + if (strcmp((char*)uncompr, hello)) { + fprintf(stderr, "bad gzread: %s\n", (char*)uncompr); + exit(1); + } else { + printf("gzread(): %s\n", (char*)uncompr); + } + + pos = gzseek(file, -8L, SEEK_CUR); + if (pos != 6 || gztell(file) != pos) { + fprintf(stderr, "gzseek error, pos=%ld, gztell=%ld\n", + (long)pos, (long)gztell(file)); + exit(1); + } + + if (gzgetc(file) != ' ') { + fprintf(stderr, "gzgetc error\n"); + exit(1); + } + + if (gzungetc(' ', file) != ' ') { + fprintf(stderr, "gzungetc error\n"); + exit(1); + } + + gzgets(file, (char*)uncompr, (int)uncomprLen); + if (strlen((char*)uncompr) != 7) { /* " hello!" */ + fprintf(stderr, "gzgets err after gzseek: %s\n", gzerror(file, &err)); + exit(1); + } + if (strcmp((char*)uncompr, hello + 6)) { + fprintf(stderr, "bad gzgets after gzseek\n"); + exit(1); + } else { + printf("gzgets() after gzseek: %s\n", (char*)uncompr); + } + + gzclose(file); +#endif +} + +#endif /* Z_SOLO */ + +/* =========================================================================== + * Test deflate() with small buffers + */ +static void test_deflate(Byte *compr, uLong comprLen) { + z_stream c_stream; /* compression stream */ + int err; + uLong len = (uLong)strlen(hello)+1; + + c_stream.zalloc = zalloc; + c_stream.zfree = zfree; + c_stream.opaque = (voidpf)0; + + err = deflateInit(&c_stream, Z_DEFAULT_COMPRESSION); + CHECK_ERR(err, "deflateInit"); + + c_stream.next_in = (z_const unsigned char *)hello; + c_stream.next_out = compr; + + while (c_stream.total_in != len && c_stream.total_out < comprLen) { + c_stream.avail_in = c_stream.avail_out = 1; /* force small buffers */ + err = deflate(&c_stream, Z_NO_FLUSH); + CHECK_ERR(err, "deflate"); + } + /* Finish the stream, still forcing small buffers: */ + for (;;) { + c_stream.avail_out = 1; + err = deflate(&c_stream, Z_FINISH); + if (err == Z_STREAM_END) break; + CHECK_ERR(err, "deflate"); + } + + err = deflateEnd(&c_stream); + CHECK_ERR(err, "deflateEnd"); +} + +/* =========================================================================== + * Test inflate() with small buffers + */ +static void test_inflate(Byte *compr, uLong comprLen, Byte *uncompr, + uLong uncomprLen) { + int err; + z_stream d_stream; /* decompression stream */ + + strcpy((char*)uncompr, "garbage"); + + d_stream.zalloc = zalloc; + d_stream.zfree = zfree; + d_stream.opaque = (voidpf)0; + + d_stream.next_in = compr; + d_stream.avail_in = 0; + d_stream.next_out = uncompr; + + err = inflateInit(&d_stream); + CHECK_ERR(err, "inflateInit"); + + while (d_stream.total_out < uncomprLen && d_stream.total_in < comprLen) { + d_stream.avail_in = d_stream.avail_out = 1; /* force small buffers */ + err = inflate(&d_stream, Z_NO_FLUSH); + if (err == Z_STREAM_END) break; + CHECK_ERR(err, "inflate"); + } + + err = inflateEnd(&d_stream); + CHECK_ERR(err, "inflateEnd"); + + if (strcmp((char*)uncompr, hello)) { + fprintf(stderr, "bad inflate\n"); + exit(1); + } else { + printf("inflate(): %s\n", (char *)uncompr); + } +} + +/* =========================================================================== + * Test deflate() with large buffers and dynamic change of compression level + */ +static void test_large_deflate(Byte *compr, uLong comprLen, Byte *uncompr, + uLong uncomprLen) { + z_stream c_stream; /* compression stream */ + int err; + + c_stream.zalloc = zalloc; + c_stream.zfree = zfree; + c_stream.opaque = (voidpf)0; + + err = deflateInit(&c_stream, Z_BEST_SPEED); + CHECK_ERR(err, "deflateInit"); + + c_stream.next_out = compr; + c_stream.avail_out = (uInt)comprLen; + + /* At this point, uncompr is still mostly zeroes, so it should compress + * very well: + */ + c_stream.next_in = uncompr; + c_stream.avail_in = (uInt)uncomprLen; + err = deflate(&c_stream, Z_NO_FLUSH); + CHECK_ERR(err, "deflate"); + if (c_stream.avail_in != 0) { + fprintf(stderr, "deflate not greedy\n"); + exit(1); + } + + /* Feed in already compressed data and switch to no compression: */ + deflateParams(&c_stream, Z_NO_COMPRESSION, Z_DEFAULT_STRATEGY); + c_stream.next_in = compr; + c_stream.avail_in = (uInt)uncomprLen/2; + err = deflate(&c_stream, Z_NO_FLUSH); + CHECK_ERR(err, "deflate"); + + /* Switch back to compressing mode: */ + deflateParams(&c_stream, Z_BEST_COMPRESSION, Z_FILTERED); + c_stream.next_in = uncompr; + c_stream.avail_in = (uInt)uncomprLen; + err = deflate(&c_stream, Z_NO_FLUSH); + CHECK_ERR(err, "deflate"); + + err = deflate(&c_stream, Z_FINISH); + if (err != Z_STREAM_END) { + fprintf(stderr, "deflate should report Z_STREAM_END\n"); + exit(1); + } + err = deflateEnd(&c_stream); + CHECK_ERR(err, "deflateEnd"); +} + +/* =========================================================================== + * Test inflate() with large buffers + */ +static void test_large_inflate(Byte *compr, uLong comprLen, Byte *uncompr, + uLong uncomprLen) { + int err; + z_stream d_stream; /* decompression stream */ + + strcpy((char*)uncompr, "garbage"); + + d_stream.zalloc = zalloc; + d_stream.zfree = zfree; + d_stream.opaque = (voidpf)0; + + d_stream.next_in = compr; + d_stream.avail_in = (uInt)comprLen; + + err = inflateInit(&d_stream); + CHECK_ERR(err, "inflateInit"); + + for (;;) { + d_stream.next_out = uncompr; /* discard the output */ + d_stream.avail_out = (uInt)uncomprLen; + err = inflate(&d_stream, Z_NO_FLUSH); + if (err == Z_STREAM_END) break; + CHECK_ERR(err, "large inflate"); + } + + err = inflateEnd(&d_stream); + CHECK_ERR(err, "inflateEnd"); + + if (d_stream.total_out != 2*uncomprLen + uncomprLen/2) { + fprintf(stderr, "bad large inflate: %ld\n", d_stream.total_out); + exit(1); + } else { + printf("large_inflate(): OK\n"); + } +} + +/* =========================================================================== + * Test deflate() with full flush + */ +static void test_flush(Byte *compr, uLong *comprLen) { + z_stream c_stream; /* compression stream */ + int err; + uInt len = (uInt)strlen(hello)+1; + + c_stream.zalloc = zalloc; + c_stream.zfree = zfree; + c_stream.opaque = (voidpf)0; + + err = deflateInit(&c_stream, Z_DEFAULT_COMPRESSION); + CHECK_ERR(err, "deflateInit"); + + c_stream.next_in = (z_const unsigned char *)hello; + c_stream.next_out = compr; + c_stream.avail_in = 3; + c_stream.avail_out = (uInt)*comprLen; + err = deflate(&c_stream, Z_FULL_FLUSH); + CHECK_ERR(err, "deflate"); + + compr[3]++; /* force an error in first compressed block */ + c_stream.avail_in = len - 3; + + err = deflate(&c_stream, Z_FINISH); + if (err != Z_STREAM_END) { + CHECK_ERR(err, "deflate"); + } + err = deflateEnd(&c_stream); + CHECK_ERR(err, "deflateEnd"); + + *comprLen = c_stream.total_out; +} + +/* =========================================================================== + * Test inflateSync() + */ +static void test_sync(Byte *compr, uLong comprLen, Byte *uncompr, + uLong uncomprLen) { + int err; + z_stream d_stream; /* decompression stream */ + + strcpy((char*)uncompr, "garbage"); + + d_stream.zalloc = zalloc; + d_stream.zfree = zfree; + d_stream.opaque = (voidpf)0; + + d_stream.next_in = compr; + d_stream.avail_in = 2; /* just read the zlib header */ + + err = inflateInit(&d_stream); + CHECK_ERR(err, "inflateInit"); + + d_stream.next_out = uncompr; + d_stream.avail_out = (uInt)uncomprLen; + + err = inflate(&d_stream, Z_NO_FLUSH); + CHECK_ERR(err, "inflate"); + + d_stream.avail_in = (uInt)comprLen-2; /* read all compressed data */ + err = inflateSync(&d_stream); /* but skip the damaged part */ + CHECK_ERR(err, "inflateSync"); + + err = inflate(&d_stream, Z_FINISH); + if (err != Z_STREAM_END) { + fprintf(stderr, "inflate should report Z_STREAM_END\n"); + exit(1); + } + err = inflateEnd(&d_stream); + CHECK_ERR(err, "inflateEnd"); + + printf("after inflateSync(): hel%s\n", (char *)uncompr); +} + +/* =========================================================================== + * Test deflate() with preset dictionary + */ +static void test_dict_deflate(Byte *compr, uLong comprLen) { + z_stream c_stream; /* compression stream */ + int err; + + c_stream.zalloc = zalloc; + c_stream.zfree = zfree; + c_stream.opaque = (voidpf)0; + + err = deflateInit(&c_stream, Z_BEST_COMPRESSION); + CHECK_ERR(err, "deflateInit"); + + err = deflateSetDictionary(&c_stream, + (const Bytef*)dictionary, (int)sizeof(dictionary)); + CHECK_ERR(err, "deflateSetDictionary"); + + dictId = c_stream.adler; + c_stream.next_out = compr; + c_stream.avail_out = (uInt)comprLen; + + c_stream.next_in = (z_const unsigned char *)hello; + c_stream.avail_in = (uInt)strlen(hello)+1; + + err = deflate(&c_stream, Z_FINISH); + if (err != Z_STREAM_END) { + fprintf(stderr, "deflate should report Z_STREAM_END\n"); + exit(1); + } + err = deflateEnd(&c_stream); + CHECK_ERR(err, "deflateEnd"); +} + +/* =========================================================================== + * Test inflate() with a preset dictionary + */ +static void test_dict_inflate(Byte *compr, uLong comprLen, Byte *uncompr, + uLong uncomprLen) { + int err; + z_stream d_stream; /* decompression stream */ + + strcpy((char*)uncompr, "garbage"); + + d_stream.zalloc = zalloc; + d_stream.zfree = zfree; + d_stream.opaque = (voidpf)0; + + d_stream.next_in = compr; + d_stream.avail_in = (uInt)comprLen; + + err = inflateInit(&d_stream); + CHECK_ERR(err, "inflateInit"); + + d_stream.next_out = uncompr; + d_stream.avail_out = (uInt)uncomprLen; + + for (;;) { + err = inflate(&d_stream, Z_NO_FLUSH); + if (err == Z_STREAM_END) break; + if (err == Z_NEED_DICT) { + if (d_stream.adler != dictId) { + fprintf(stderr, "unexpected dictionary"); + exit(1); + } + err = inflateSetDictionary(&d_stream, (const Bytef*)dictionary, + (int)sizeof(dictionary)); + } + CHECK_ERR(err, "inflate with dict"); + } + + err = inflateEnd(&d_stream); + CHECK_ERR(err, "inflateEnd"); + + if (strcmp((char*)uncompr, hello)) { + fprintf(stderr, "bad inflate with dict\n"); + exit(1); + } else { + printf("inflate with dictionary: %s\n", (char *)uncompr); + } +} + +/* =========================================================================== + * Usage: example [output.gz [input.gz]] + */ + +int main(int argc, char *argv[]) { + Byte *compr, *uncompr; + uLong uncomprLen = 20000; + uLong comprLen = 3 * uncomprLen; + static const char* myVersion = ZLIB_VERSION; + + if (zlibVersion()[0] != myVersion[0]) { + fprintf(stderr, "incompatible zlib version\n"); + exit(1); + + } else if (strcmp(zlibVersion(), ZLIB_VERSION) != 0) { + fprintf(stderr, "warning: different zlib version linked: %s\n", + zlibVersion()); + } + + printf("zlib version %s = 0x%04x, compile flags = 0x%lx\n", + ZLIB_VERSION, ZLIB_VERNUM, zlibCompileFlags()); + + compr = (Byte*)calloc((uInt)comprLen, 1); + uncompr = (Byte*)calloc((uInt)uncomprLen, 1); + /* compr and uncompr are cleared to avoid reading uninitialized + * data and to ensure that uncompr compresses well. + */ + if (compr == Z_NULL || uncompr == Z_NULL) { + printf("out of memory\n"); + exit(1); + } + +#ifdef Z_SOLO + (void)argc; + (void)argv; +#else + test_compress(compr, comprLen, uncompr, uncomprLen); + + test_gzio((argc > 1 ? argv[1] : TESTFILE), + uncompr, uncomprLen); +#endif + + test_deflate(compr, comprLen); + test_inflate(compr, comprLen, uncompr, uncomprLen); + + test_large_deflate(compr, comprLen, uncompr, uncomprLen); + test_large_inflate(compr, comprLen, uncompr, uncomprLen); + + test_flush(compr, &comprLen); + test_sync(compr, comprLen, uncompr, uncomprLen); + comprLen = 3 * uncomprLen; + + test_dict_deflate(compr, comprLen); + test_dict_inflate(compr, comprLen, uncompr, uncomprLen); + + free(compr); + free(uncompr); + + return 0; +} diff --git a/c-blosc/internal-complibs/zlib-1.3.1/test/infcover.c b/c-blosc/internal-complibs/zlib-1.3.1/test/infcover.c new file mode 100644 index 0000000..8912c40 --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.3.1/test/infcover.c @@ -0,0 +1,672 @@ +/* infcover.c -- test zlib's inflate routines with full code coverage + * Copyright (C) 2011, 2016 Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* to use, do: ./configure --cover && make cover */ + +#include +#include +#include +#include +#include "zlib.h" + +/* get definition of internal structure so we can mess with it (see pull()), + and so we can call inflate_trees() (see cover5()) */ +#define ZLIB_INTERNAL +#include "inftrees.h" +#include "inflate.h" + +#define local static + +/* -- memory tracking routines -- */ + +/* + These memory tracking routines are provided to zlib and track all of zlib's + allocations and deallocations, check for LIFO operations, keep a current + and high water mark of total bytes requested, optionally set a limit on the + total memory that can be allocated, and when done check for memory leaks. + + They are used as follows: + + z_stream strm; + mem_setup(&strm) initializes the memory tracking and sets the + zalloc, zfree, and opaque members of strm to use + memory tracking for all zlib operations on strm + mem_limit(&strm, limit) sets a limit on the total bytes requested -- a + request that exceeds this limit will result in an + allocation failure (returns NULL) -- setting the + limit to zero means no limit, which is the default + after mem_setup() + mem_used(&strm, "msg") prints to stderr "msg" and the total bytes used + mem_high(&strm, "msg") prints to stderr "msg" and the high water mark + mem_done(&strm, "msg") ends memory tracking, releases all allocations + for the tracking as well as leaked zlib blocks, if + any. If there was anything unusual, such as leaked + blocks, non-FIFO frees, or frees of addresses not + allocated, then "msg" and information about the + problem is printed to stderr. If everything is + normal, nothing is printed. mem_done resets the + strm members to Z_NULL to use the default memory + allocation routines on the next zlib initialization + using strm. + */ + +/* these items are strung together in a linked list, one for each allocation */ +struct mem_item { + void *ptr; /* pointer to allocated memory */ + size_t size; /* requested size of allocation */ + struct mem_item *next; /* pointer to next item in list, or NULL */ +}; + +/* this structure is at the root of the linked list, and tracks statistics */ +struct mem_zone { + struct mem_item *first; /* pointer to first item in list, or NULL */ + size_t total, highwater; /* total allocations, and largest total */ + size_t limit; /* memory allocation limit, or 0 if no limit */ + int notlifo, rogue; /* counts of non-LIFO frees and rogue frees */ +}; + +/* memory allocation routine to pass to zlib */ +local void *mem_alloc(void *mem, unsigned count, unsigned size) +{ + void *ptr; + struct mem_item *item; + struct mem_zone *zone = mem; + size_t len = count * (size_t)size; + + /* induced allocation failure */ + if (zone == NULL || (zone->limit && zone->total + len > zone->limit)) + return NULL; + + /* perform allocation using the standard library, fill memory with a + non-zero value to make sure that the code isn't depending on zeros */ + ptr = malloc(len); + if (ptr == NULL) + return NULL; + memset(ptr, 0xa5, len); + + /* create a new item for the list */ + item = malloc(sizeof(struct mem_item)); + if (item == NULL) { + free(ptr); + return NULL; + } + item->ptr = ptr; + item->size = len; + + /* insert item at the beginning of the list */ + item->next = zone->first; + zone->first = item; + + /* update the statistics */ + zone->total += item->size; + if (zone->total > zone->highwater) + zone->highwater = zone->total; + + /* return the allocated memory */ + return ptr; +} + +/* memory free routine to pass to zlib */ +local void mem_free(void *mem, void *ptr) +{ + struct mem_item *item, *next; + struct mem_zone *zone = mem; + + /* if no zone, just do a free */ + if (zone == NULL) { + free(ptr); + return; + } + + /* point next to the item that matches ptr, or NULL if not found -- remove + the item from the linked list if found */ + next = zone->first; + if (next) { + if (next->ptr == ptr) + zone->first = next->next; /* first one is it, remove from list */ + else { + do { /* search the linked list */ + item = next; + next = item->next; + } while (next != NULL && next->ptr != ptr); + if (next) { /* if found, remove from linked list */ + item->next = next->next; + zone->notlifo++; /* not a LIFO free */ + } + + } + } + + /* if found, update the statistics and free the item */ + if (next) { + zone->total -= next->size; + free(next); + } + + /* if not found, update the rogue count */ + else + zone->rogue++; + + /* in any case, do the requested free with the standard library function */ + free(ptr); +} + +/* set up a controlled memory allocation space for monitoring, set the stream + parameters to the controlled routines, with opaque pointing to the space */ +local void mem_setup(z_stream *strm) +{ + struct mem_zone *zone; + + zone = malloc(sizeof(struct mem_zone)); + assert(zone != NULL); + zone->first = NULL; + zone->total = 0; + zone->highwater = 0; + zone->limit = 0; + zone->notlifo = 0; + zone->rogue = 0; + strm->opaque = zone; + strm->zalloc = mem_alloc; + strm->zfree = mem_free; +} + +/* set a limit on the total memory allocation, or 0 to remove the limit */ +local void mem_limit(z_stream *strm, size_t limit) +{ + struct mem_zone *zone = strm->opaque; + + zone->limit = limit; +} + +/* show the current total requested allocations in bytes */ +local void mem_used(z_stream *strm, char *prefix) +{ + struct mem_zone *zone = strm->opaque; + + fprintf(stderr, "%s: %lu allocated\n", prefix, zone->total); +} + +/* show the high water allocation in bytes */ +local void mem_high(z_stream *strm, char *prefix) +{ + struct mem_zone *zone = strm->opaque; + + fprintf(stderr, "%s: %lu high water mark\n", prefix, zone->highwater); +} + +/* release the memory allocation zone -- if there are any surprises, notify */ +local void mem_done(z_stream *strm, char *prefix) +{ + int count = 0; + struct mem_item *item, *next; + struct mem_zone *zone = strm->opaque; + + /* show high water mark */ + mem_high(strm, prefix); + + /* free leftover allocations and item structures, if any */ + item = zone->first; + while (item != NULL) { + free(item->ptr); + next = item->next; + free(item); + item = next; + count++; + } + + /* issue alerts about anything unexpected */ + if (count || zone->total) + fprintf(stderr, "** %s: %lu bytes in %d blocks not freed\n", + prefix, zone->total, count); + if (zone->notlifo) + fprintf(stderr, "** %s: %d frees not LIFO\n", prefix, zone->notlifo); + if (zone->rogue) + fprintf(stderr, "** %s: %d frees not recognized\n", + prefix, zone->rogue); + + /* free the zone and delete from the stream */ + free(zone); + strm->opaque = Z_NULL; + strm->zalloc = Z_NULL; + strm->zfree = Z_NULL; +} + +/* -- inflate test routines -- */ + +/* Decode a hexadecimal string, set *len to length, in[] to the bytes. This + decodes liberally, in that hex digits can be adjacent, in which case two in + a row writes a byte. Or they can be delimited by any non-hex character, + where the delimiters are ignored except when a single hex digit is followed + by a delimiter, where that single digit writes a byte. The returned data is + allocated and must eventually be freed. NULL is returned if out of memory. + If the length is not needed, then len can be NULL. */ +local unsigned char *h2b(const char *hex, unsigned *len) +{ + unsigned char *in, *re; + unsigned next, val; + + in = malloc((strlen(hex) + 1) >> 1); + if (in == NULL) + return NULL; + next = 0; + val = 1; + do { + if (*hex >= '0' && *hex <= '9') + val = (val << 4) + *hex - '0'; + else if (*hex >= 'A' && *hex <= 'F') + val = (val << 4) + *hex - 'A' + 10; + else if (*hex >= 'a' && *hex <= 'f') + val = (val << 4) + *hex - 'a' + 10; + else if (val != 1 && val < 32) /* one digit followed by delimiter */ + val += 240; /* make it look like two digits */ + if (val > 255) { /* have two digits */ + in[next++] = val & 0xff; /* save the decoded byte */ + val = 1; /* start over */ + } + } while (*hex++); /* go through the loop with the terminating null */ + if (len != NULL) + *len = next; + re = realloc(in, next); + return re == NULL ? in : re; +} + +/* generic inflate() run, where hex is the hexadecimal input data, what is the + text to include in an error message, step is how much input data to feed + inflate() on each call, or zero to feed it all, win is the window bits + parameter to inflateInit2(), len is the size of the output buffer, and err + is the error code expected from the first inflate() call (the second + inflate() call is expected to return Z_STREAM_END). If win is 47, then + header information is collected with inflateGetHeader(). If a zlib stream + is looking for a dictionary, then an empty dictionary is provided. + inflate() is run until all of the input data is consumed. */ +local void inf(char *hex, char *what, unsigned step, int win, unsigned len, + int err) +{ + int ret; + unsigned have; + unsigned char *in, *out; + z_stream strm, copy; + gz_header head; + + mem_setup(&strm); + strm.avail_in = 0; + strm.next_in = Z_NULL; + ret = inflateInit2(&strm, win); + if (ret != Z_OK) { + mem_done(&strm, what); + return; + } + out = malloc(len); assert(out != NULL); + if (win == 47) { + head.extra = out; + head.extra_max = len; + head.name = out; + head.name_max = len; + head.comment = out; + head.comm_max = len; + ret = inflateGetHeader(&strm, &head); assert(ret == Z_OK); + } + in = h2b(hex, &have); assert(in != NULL); + if (step == 0 || step > have) + step = have; + strm.avail_in = step; + have -= step; + strm.next_in = in; + do { + strm.avail_out = len; + strm.next_out = out; + ret = inflate(&strm, Z_NO_FLUSH); assert(err == 9 || ret == err); + if (ret != Z_OK && ret != Z_BUF_ERROR && ret != Z_NEED_DICT) + break; + if (ret == Z_NEED_DICT) { + ret = inflateSetDictionary(&strm, in, 1); + assert(ret == Z_DATA_ERROR); + mem_limit(&strm, 1); + ret = inflateSetDictionary(&strm, out, 0); + assert(ret == Z_MEM_ERROR); + mem_limit(&strm, 0); + ((struct inflate_state *)strm.state)->mode = DICT; + ret = inflateSetDictionary(&strm, out, 0); + assert(ret == Z_OK); + ret = inflate(&strm, Z_NO_FLUSH); assert(ret == Z_BUF_ERROR); + } + ret = inflateCopy(©, &strm); assert(ret == Z_OK); + ret = inflateEnd(©); assert(ret == Z_OK); + err = 9; /* don't care next time around */ + have += strm.avail_in; + strm.avail_in = step > have ? have : step; + have -= strm.avail_in; + } while (strm.avail_in); + free(in); + free(out); + ret = inflateReset2(&strm, -8); assert(ret == Z_OK); + ret = inflateEnd(&strm); assert(ret == Z_OK); + mem_done(&strm, what); +} + +/* cover all of the lines in inflate.c up to inflate() */ +local void cover_support(void) +{ + int ret; + z_stream strm; + + mem_setup(&strm); + strm.avail_in = 0; + strm.next_in = Z_NULL; + ret = inflateInit(&strm); assert(ret == Z_OK); + mem_used(&strm, "inflate init"); + ret = inflatePrime(&strm, 5, 31); assert(ret == Z_OK); + ret = inflatePrime(&strm, -1, 0); assert(ret == Z_OK); + ret = inflateSetDictionary(&strm, Z_NULL, 0); + assert(ret == Z_STREAM_ERROR); + ret = inflateEnd(&strm); assert(ret == Z_OK); + mem_done(&strm, "prime"); + + inf("63 0", "force window allocation", 0, -15, 1, Z_OK); + inf("63 18 5", "force window replacement", 0, -8, 259, Z_OK); + inf("63 18 68 30 d0 0 0", "force split window update", 4, -8, 259, Z_OK); + inf("3 0", "use fixed blocks", 0, -15, 1, Z_STREAM_END); + inf("", "bad window size", 0, 1, 0, Z_STREAM_ERROR); + + mem_setup(&strm); + strm.avail_in = 0; + strm.next_in = Z_NULL; + ret = inflateInit_(&strm, "!", (int)sizeof(z_stream)); + assert(ret == Z_VERSION_ERROR); + mem_done(&strm, "wrong version"); + + strm.avail_in = 0; + strm.next_in = Z_NULL; + ret = inflateInit(&strm); assert(ret == Z_OK); + ret = inflateEnd(&strm); assert(ret == Z_OK); + fputs("inflate built-in memory routines\n", stderr); +} + +/* cover all inflate() header and trailer cases and code after inflate() */ +local void cover_wrap(void) +{ + int ret; + z_stream strm, copy; + unsigned char dict[257]; + + ret = inflate(Z_NULL, 0); assert(ret == Z_STREAM_ERROR); + ret = inflateEnd(Z_NULL); assert(ret == Z_STREAM_ERROR); + ret = inflateCopy(Z_NULL, Z_NULL); assert(ret == Z_STREAM_ERROR); + fputs("inflate bad parameters\n", stderr); + + inf("1f 8b 0 0", "bad gzip method", 0, 31, 0, Z_DATA_ERROR); + inf("1f 8b 8 80", "bad gzip flags", 0, 31, 0, Z_DATA_ERROR); + inf("77 85", "bad zlib method", 0, 15, 0, Z_DATA_ERROR); + inf("8 99", "set window size from header", 0, 0, 0, Z_OK); + inf("78 9c", "bad zlib window size", 0, 8, 0, Z_DATA_ERROR); + inf("78 9c 63 0 0 0 1 0 1", "check adler32", 0, 15, 1, Z_STREAM_END); + inf("1f 8b 8 1e 0 0 0 0 0 0 1 0 0 0 0 0 0", "bad header crc", 0, 47, 1, + Z_DATA_ERROR); + inf("1f 8b 8 2 0 0 0 0 0 0 1d 26 3 0 0 0 0 0 0 0 0 0", "check gzip length", + 0, 47, 0, Z_STREAM_END); + inf("78 90", "bad zlib header check", 0, 47, 0, Z_DATA_ERROR); + inf("8 b8 0 0 0 1", "need dictionary", 0, 8, 0, Z_NEED_DICT); + inf("78 9c 63 0", "compute adler32", 0, 15, 1, Z_OK); + + mem_setup(&strm); + strm.avail_in = 0; + strm.next_in = Z_NULL; + ret = inflateInit2(&strm, -8); + strm.avail_in = 2; + strm.next_in = (void *)"\x63"; + strm.avail_out = 1; + strm.next_out = (void *)&ret; + mem_limit(&strm, 1); + ret = inflate(&strm, Z_NO_FLUSH); assert(ret == Z_MEM_ERROR); + ret = inflate(&strm, Z_NO_FLUSH); assert(ret == Z_MEM_ERROR); + mem_limit(&strm, 0); + memset(dict, 0, 257); + ret = inflateSetDictionary(&strm, dict, 257); + assert(ret == Z_OK); + mem_limit(&strm, (sizeof(struct inflate_state) << 1) + 256); + ret = inflatePrime(&strm, 16, 0); assert(ret == Z_OK); + strm.avail_in = 2; + strm.next_in = (void *)"\x80"; + ret = inflateSync(&strm); assert(ret == Z_DATA_ERROR); + ret = inflate(&strm, Z_NO_FLUSH); assert(ret == Z_STREAM_ERROR); + strm.avail_in = 4; + strm.next_in = (void *)"\0\0\xff\xff"; + ret = inflateSync(&strm); assert(ret == Z_OK); + (void)inflateSyncPoint(&strm); + ret = inflateCopy(©, &strm); assert(ret == Z_MEM_ERROR); + mem_limit(&strm, 0); + ret = inflateUndermine(&strm, 1); assert(ret == Z_DATA_ERROR); + (void)inflateMark(&strm); + ret = inflateEnd(&strm); assert(ret == Z_OK); + mem_done(&strm, "miscellaneous, force memory errors"); +} + +/* input and output functions for inflateBack() */ +local unsigned pull(void *desc, unsigned char **buf) +{ + static unsigned int next = 0; + static unsigned char dat[] = {0x63, 0, 2, 0}; + struct inflate_state *state; + + if (desc == Z_NULL) { + next = 0; + return 0; /* no input (already provided at next_in) */ + } + state = (void *)((z_stream *)desc)->state; + if (state != Z_NULL) + state->mode = SYNC; /* force an otherwise impossible situation */ + return next < sizeof(dat) ? (*buf = dat + next++, 1) : 0; +} + +local int push(void *desc, unsigned char *buf, unsigned len) +{ + (void)buf; + (void)len; + return desc != Z_NULL; /* force error if desc not null */ +} + +/* cover inflateBack() up to common deflate data cases and after those */ +local void cover_back(void) +{ + int ret; + z_stream strm; + unsigned char win[32768]; + + ret = inflateBackInit_(Z_NULL, 0, win, 0, 0); + assert(ret == Z_VERSION_ERROR); + ret = inflateBackInit(Z_NULL, 0, win); assert(ret == Z_STREAM_ERROR); + ret = inflateBack(Z_NULL, Z_NULL, Z_NULL, Z_NULL, Z_NULL); + assert(ret == Z_STREAM_ERROR); + ret = inflateBackEnd(Z_NULL); assert(ret == Z_STREAM_ERROR); + fputs("inflateBack bad parameters\n", stderr); + + mem_setup(&strm); + ret = inflateBackInit(&strm, 15, win); assert(ret == Z_OK); + strm.avail_in = 2; + strm.next_in = (void *)"\x03"; + ret = inflateBack(&strm, pull, Z_NULL, push, Z_NULL); + assert(ret == Z_STREAM_END); + /* force output error */ + strm.avail_in = 3; + strm.next_in = (void *)"\x63\x00"; + ret = inflateBack(&strm, pull, Z_NULL, push, &strm); + assert(ret == Z_BUF_ERROR); + /* force mode error by mucking with state */ + ret = inflateBack(&strm, pull, &strm, push, Z_NULL); + assert(ret == Z_STREAM_ERROR); + ret = inflateBackEnd(&strm); assert(ret == Z_OK); + mem_done(&strm, "inflateBack bad state"); + + ret = inflateBackInit(&strm, 15, win); assert(ret == Z_OK); + ret = inflateBackEnd(&strm); assert(ret == Z_OK); + fputs("inflateBack built-in memory routines\n", stderr); +} + +/* do a raw inflate of data in hexadecimal with both inflate and inflateBack */ +local int try(char *hex, char *id, int err) +{ + int ret; + unsigned len, size; + unsigned char *in, *out, *win; + char *prefix; + z_stream strm; + + /* convert to hex */ + in = h2b(hex, &len); + assert(in != NULL); + + /* allocate work areas */ + size = len << 3; + out = malloc(size); + assert(out != NULL); + win = malloc(32768); + assert(win != NULL); + prefix = malloc(strlen(id) + 6); + assert(prefix != NULL); + + /* first with inflate */ + strcpy(prefix, id); + strcat(prefix, "-late"); + mem_setup(&strm); + strm.avail_in = 0; + strm.next_in = Z_NULL; + ret = inflateInit2(&strm, err < 0 ? 47 : -15); + assert(ret == Z_OK); + strm.avail_in = len; + strm.next_in = in; + do { + strm.avail_out = size; + strm.next_out = out; + ret = inflate(&strm, Z_TREES); + assert(ret != Z_STREAM_ERROR && ret != Z_MEM_ERROR); + if (ret == Z_DATA_ERROR || ret == Z_NEED_DICT) + break; + } while (strm.avail_in || strm.avail_out == 0); + if (err) { + assert(ret == Z_DATA_ERROR); + assert(strcmp(id, strm.msg) == 0); + } + inflateEnd(&strm); + mem_done(&strm, prefix); + + /* then with inflateBack */ + if (err >= 0) { + strcpy(prefix, id); + strcat(prefix, "-back"); + mem_setup(&strm); + ret = inflateBackInit(&strm, 15, win); + assert(ret == Z_OK); + strm.avail_in = len; + strm.next_in = in; + ret = inflateBack(&strm, pull, Z_NULL, push, Z_NULL); + assert(ret != Z_STREAM_ERROR); + if (err) { + assert(ret == Z_DATA_ERROR); + assert(strcmp(id, strm.msg) == 0); + } + inflateBackEnd(&strm); + mem_done(&strm, prefix); + } + + /* clean up */ + free(prefix); + free(win); + free(out); + free(in); + return ret; +} + +/* cover deflate data cases in both inflate() and inflateBack() */ +local void cover_inflate(void) +{ + try("0 0 0 0 0", "invalid stored block lengths", 1); + try("3 0", "fixed", 0); + try("6", "invalid block type", 1); + try("1 1 0 fe ff 0", "stored", 0); + try("fc 0 0", "too many length or distance symbols", 1); + try("4 0 fe ff", "invalid code lengths set", 1); + try("4 0 24 49 0", "invalid bit length repeat", 1); + try("4 0 24 e9 ff ff", "invalid bit length repeat", 1); + try("4 0 24 e9 ff 6d", "invalid code -- missing end-of-block", 1); + try("4 80 49 92 24 49 92 24 71 ff ff 93 11 0", + "invalid literal/lengths set", 1); + try("4 80 49 92 24 49 92 24 f b4 ff ff c3 84", "invalid distances set", 1); + try("4 c0 81 8 0 0 0 0 20 7f eb b 0 0", "invalid literal/length code", 1); + try("2 7e ff ff", "invalid distance code", 1); + try("c c0 81 0 0 0 0 0 90 ff 6b 4 0", "invalid distance too far back", 1); + + /* also trailer mismatch just in inflate() */ + try("1f 8b 8 0 0 0 0 0 0 0 3 0 0 0 0 1", "incorrect data check", -1); + try("1f 8b 8 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 1", + "incorrect length check", -1); + try("5 c0 21 d 0 0 0 80 b0 fe 6d 2f 91 6c", "pull 17", 0); + try("5 e0 81 91 24 cb b2 2c 49 e2 f 2e 8b 9a 47 56 9f fb fe ec d2 ff 1f", + "long code", 0); + try("ed c0 1 1 0 0 0 40 20 ff 57 1b 42 2c 4f", "length extra", 0); + try("ed cf c1 b1 2c 47 10 c4 30 fa 6f 35 1d 1 82 59 3d fb be 2e 2a fc f c", + "long distance and extra", 0); + try("ed c0 81 0 0 0 0 80 a0 fd a9 17 a9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 " + "0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6", "window end", 0); + inf("2 8 20 80 0 3 0", "inflate_fast TYPE return", 0, -15, 258, + Z_STREAM_END); + inf("63 18 5 40 c 0", "window wrap", 3, -8, 300, Z_OK); +} + +/* cover remaining lines in inftrees.c */ +local void cover_trees(void) +{ + int ret; + unsigned bits; + unsigned short lens[16], work[16]; + code *next, table[ENOUGH_DISTS]; + + /* we need to call inflate_table() directly in order to manifest not- + enough errors, since zlib insures that enough is always enough */ + for (bits = 0; bits < 15; bits++) + lens[bits] = (unsigned short)(bits + 1); + lens[15] = 15; + next = table; + bits = 15; + ret = inflate_table(DISTS, lens, 16, &next, &bits, work); + assert(ret == 1); + next = table; + bits = 1; + ret = inflate_table(DISTS, lens, 16, &next, &bits, work); + assert(ret == 1); + fputs("inflate_table not enough errors\n", stderr); +} + +/* cover remaining inffast.c decoding and window copying */ +local void cover_fast(void) +{ + inf("e5 e0 81 ad 6d cb b2 2c c9 01 1e 59 63 ae 7d ee fb 4d fd b5 35 41 68" + " ff 7f 0f 0 0 0", "fast length extra bits", 0, -8, 258, Z_DATA_ERROR); + inf("25 fd 81 b5 6d 59 b6 6a 49 ea af 35 6 34 eb 8c b9 f6 b9 1e ef 67 49" + " 50 fe ff ff 3f 0 0", "fast distance extra bits", 0, -8, 258, + Z_DATA_ERROR); + inf("3 7e 0 0 0 0 0", "fast invalid distance code", 0, -8, 258, + Z_DATA_ERROR); + inf("1b 7 0 0 0 0 0", "fast invalid literal/length code", 0, -8, 258, + Z_DATA_ERROR); + inf("d c7 1 ae eb 38 c 4 41 a0 87 72 de df fb 1f b8 36 b1 38 5d ff ff 0", + "fast 2nd level codes and too far back", 0, -8, 258, Z_DATA_ERROR); + inf("63 18 5 8c 10 8 0 0 0 0", "very common case", 0, -8, 259, Z_OK); + inf("63 60 60 18 c9 0 8 18 18 18 26 c0 28 0 29 0 0 0", + "contiguous and wrap around window", 6, -8, 259, Z_OK); + inf("63 0 3 0 0 0 0 0", "copy direct from output", 0, -8, 259, + Z_STREAM_END); +} + +int main(void) +{ + fprintf(stderr, "%s\n", zlibVersion()); + cover_support(); + cover_wrap(); + cover_back(); + cover_inflate(); + cover_trees(); + cover_fast(); + return 0; +} diff --git a/c-blosc/internal-complibs/zlib-1.3.1/test/minigzip.c b/c-blosc/internal-complibs/zlib-1.3.1/test/minigzip.c new file mode 100644 index 0000000..134e10e --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.3.1/test/minigzip.c @@ -0,0 +1,579 @@ +/* minigzip.c -- simulate gzip using the zlib compression library + * Copyright (C) 1995-2006, 2010, 2011, 2016 Jean-loup Gailly + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* + * minigzip is a minimal implementation of the gzip utility. This is + * only an example of using zlib and isn't meant to replace the + * full-featured gzip. No attempt is made to deal with file systems + * limiting names to 14 or 8+3 characters, etc... Error checking is + * very limited. So use minigzip only for testing; use gzip for the + * real thing. On MSDOS, use only on file names without extension + * or in pipe mode. + */ + +/* @(#) $Id$ */ + +#include "zlib.h" +#include + +#ifdef STDC +# include +# include +#endif + +#ifdef USE_MMAP +# include +# include +# include +#endif + +#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(__CYGWIN__) +# include +# include +# ifdef UNDER_CE +# include +# endif +# define SET_BINARY_MODE(file) setmode(fileno(file), O_BINARY) +#else +# define SET_BINARY_MODE(file) +#endif + +#if defined(_MSC_VER) && _MSC_VER < 1900 +# define snprintf _snprintf +#endif + +#ifdef VMS +# define unlink delete +# define GZ_SUFFIX "-gz" +#endif +#ifdef RISCOS +# define unlink remove +# define GZ_SUFFIX "-gz" +# define fileno(file) file->__file +#endif +#if defined(__MWERKS__) && __dest_os != __be_os && __dest_os != __win32_os +# include /* for fileno */ +#endif + +#if !defined(Z_HAVE_UNISTD_H) && !defined(_LARGEFILE64_SOURCE) +#ifndef WIN32 /* unlink already in stdio.h for WIN32 */ + extern int unlink(const char *); +#endif +#endif + +#if defined(UNDER_CE) +# include +# define perror(s) pwinerror(s) + +/* Map the Windows error number in ERROR to a locale-dependent error + message string and return a pointer to it. Typically, the values + for ERROR come from GetLastError. + + The string pointed to shall not be modified by the application, + but may be overwritten by a subsequent call to strwinerror + + The strwinerror function does not change the current setting + of GetLastError. */ + +static char *strwinerror (error) + DWORD error; +{ + static char buf[1024]; + + wchar_t *msgbuf; + DWORD lasterr = GetLastError(); + DWORD chars = FormatMessage(FORMAT_MESSAGE_FROM_SYSTEM + | FORMAT_MESSAGE_ALLOCATE_BUFFER, + NULL, + error, + 0, /* Default language */ + (LPVOID)&msgbuf, + 0, + NULL); + if (chars != 0) { + /* If there is an \r\n appended, zap it. */ + if (chars >= 2 + && msgbuf[chars - 2] == '\r' && msgbuf[chars - 1] == '\n') { + chars -= 2; + msgbuf[chars] = 0; + } + + if (chars > sizeof (buf) - 1) { + chars = sizeof (buf) - 1; + msgbuf[chars] = 0; + } + + wcstombs(buf, msgbuf, chars + 1); + LocalFree(msgbuf); + } + else { + sprintf(buf, "unknown win32 error (%ld)", error); + } + + SetLastError(lasterr); + return buf; +} + +static void pwinerror (s) + const char *s; +{ + if (s && *s) + fprintf(stderr, "%s: %s\n", s, strwinerror(GetLastError ())); + else + fprintf(stderr, "%s\n", strwinerror(GetLastError ())); +} + +#endif /* UNDER_CE */ + +#ifndef GZ_SUFFIX +# define GZ_SUFFIX ".gz" +#endif +#define SUFFIX_LEN (sizeof(GZ_SUFFIX)-1) + +#define BUFLEN 16384 +#define MAX_NAME_LEN 1024 + +#ifdef MAXSEG_64K +# define local static + /* Needed for systems with limitation on stack size. */ +#else +# define local +#endif + +#ifdef Z_SOLO +/* for Z_SOLO, create simplified gz* functions using deflate and inflate */ + +#if defined(Z_HAVE_UNISTD_H) || defined(Z_LARGE) +# include /* for unlink() */ +#endif + +static void *myalloc(void *q, unsigned n, unsigned m) { + (void)q; + return calloc(n, m); +} + +static void myfree(void *q, void *p) { + (void)q; + free(p); +} + +typedef struct gzFile_s { + FILE *file; + int write; + int err; + char *msg; + z_stream strm; +} *gzFile; + +static gzFile gz_open(const char *path, int fd, const char *mode) { + gzFile gz; + int ret; + + gz = malloc(sizeof(struct gzFile_s)); + if (gz == NULL) + return NULL; + gz->write = strchr(mode, 'w') != NULL; + gz->strm.zalloc = myalloc; + gz->strm.zfree = myfree; + gz->strm.opaque = Z_NULL; + if (gz->write) + ret = deflateInit2(&(gz->strm), -1, 8, 15 + 16, 8, 0); + else { + gz->strm.next_in = 0; + gz->strm.avail_in = Z_NULL; + ret = inflateInit2(&(gz->strm), 15 + 16); + } + if (ret != Z_OK) { + free(gz); + return NULL; + } + gz->file = path == NULL ? fdopen(fd, gz->write ? "wb" : "rb") : + fopen(path, gz->write ? "wb" : "rb"); + if (gz->file == NULL) { + gz->write ? deflateEnd(&(gz->strm)) : inflateEnd(&(gz->strm)); + free(gz); + return NULL; + } + gz->err = 0; + gz->msg = ""; + return gz; +} + +static gzFile gzopen(const char *path, const char *mode) { + return gz_open(path, -1, mode); +} + +static gzFile gzdopen(int fd, const char *mode) { + return gz_open(NULL, fd, mode); +} + +static int gzwrite(gzFile gz, const void *buf, unsigned len) { + z_stream *strm; + unsigned char out[BUFLEN]; + + if (gz == NULL || !gz->write) + return 0; + strm = &(gz->strm); + strm->next_in = (void *)buf; + strm->avail_in = len; + do { + strm->next_out = out; + strm->avail_out = BUFLEN; + (void)deflate(strm, Z_NO_FLUSH); + fwrite(out, 1, BUFLEN - strm->avail_out, gz->file); + } while (strm->avail_out == 0); + return len; +} + +static int gzread(gzFile gz, void *buf, unsigned len) { + int ret; + unsigned got; + unsigned char in[1]; + z_stream *strm; + + if (gz == NULL || gz->write) + return 0; + if (gz->err) + return 0; + strm = &(gz->strm); + strm->next_out = (void *)buf; + strm->avail_out = len; + do { + got = fread(in, 1, 1, gz->file); + if (got == 0) + break; + strm->next_in = in; + strm->avail_in = 1; + ret = inflate(strm, Z_NO_FLUSH); + if (ret == Z_DATA_ERROR) { + gz->err = Z_DATA_ERROR; + gz->msg = strm->msg; + return 0; + } + if (ret == Z_STREAM_END) + inflateReset(strm); + } while (strm->avail_out); + return len - strm->avail_out; +} + +static int gzclose(gzFile gz) { + z_stream *strm; + unsigned char out[BUFLEN]; + + if (gz == NULL) + return Z_STREAM_ERROR; + strm = &(gz->strm); + if (gz->write) { + strm->next_in = Z_NULL; + strm->avail_in = 0; + do { + strm->next_out = out; + strm->avail_out = BUFLEN; + (void)deflate(strm, Z_FINISH); + fwrite(out, 1, BUFLEN - strm->avail_out, gz->file); + } while (strm->avail_out == 0); + deflateEnd(strm); + } + else + inflateEnd(strm); + fclose(gz->file); + free(gz); + return Z_OK; +} + +static const char *gzerror(gzFile gz, int *err) { + *err = gz->err; + return gz->msg; +} + +#endif + +static char *prog; + +/* =========================================================================== + * Display error message and exit + */ +static void error(const char *msg) { + fprintf(stderr, "%s: %s\n", prog, msg); + exit(1); +} + +#ifdef USE_MMAP /* MMAP version, Miguel Albrecht */ + +/* Try compressing the input file at once using mmap. Return Z_OK if + * success, Z_ERRNO otherwise. + */ +static int gz_compress_mmap(FILE *in, gzFile out) { + int len; + int err; + int ifd = fileno(in); + caddr_t buf; /* mmap'ed buffer for the entire input file */ + off_t buf_len; /* length of the input file */ + struct stat sb; + + /* Determine the size of the file, needed for mmap: */ + if (fstat(ifd, &sb) < 0) return Z_ERRNO; + buf_len = sb.st_size; + if (buf_len <= 0) return Z_ERRNO; + + /* Now do the actual mmap: */ + buf = mmap((caddr_t) 0, buf_len, PROT_READ, MAP_SHARED, ifd, (off_t)0); + if (buf == (caddr_t)(-1)) return Z_ERRNO; + + /* Compress the whole file at once: */ + len = gzwrite(out, (char *)buf, (unsigned)buf_len); + + if (len != (int)buf_len) error(gzerror(out, &err)); + + munmap(buf, buf_len); + fclose(in); + if (gzclose(out) != Z_OK) error("failed gzclose"); + return Z_OK; +} +#endif /* USE_MMAP */ + +/* =========================================================================== + * Compress input to output then close both files. + */ + +static void gz_compress(FILE *in, gzFile out) { + local char buf[BUFLEN]; + int len; + int err; + +#ifdef USE_MMAP + /* Try first compressing with mmap. If mmap fails (minigzip used in a + * pipe), use the normal fread loop. + */ + if (gz_compress_mmap(in, out) == Z_OK) return; +#endif + for (;;) { + len = (int)fread(buf, 1, sizeof(buf), in); + if (ferror(in)) { + perror("fread"); + exit(1); + } + if (len == 0) break; + + if (gzwrite(out, buf, (unsigned)len) != len) error(gzerror(out, &err)); + } + fclose(in); + if (gzclose(out) != Z_OK) error("failed gzclose"); +} + +/* =========================================================================== + * Uncompress input to output then close both files. + */ +static void gz_uncompress(gzFile in, FILE *out) { + local char buf[BUFLEN]; + int len; + int err; + + for (;;) { + len = gzread(in, buf, sizeof(buf)); + if (len < 0) error (gzerror(in, &err)); + if (len == 0) break; + + if ((int)fwrite(buf, 1, (unsigned)len, out) != len) { + error("failed fwrite"); + } + } + if (fclose(out)) error("failed fclose"); + + if (gzclose(in) != Z_OK) error("failed gzclose"); +} + + +/* =========================================================================== + * Compress the given file: create a corresponding .gz file and remove the + * original. + */ +static void file_compress(char *file, char *mode) { + local char outfile[MAX_NAME_LEN]; + FILE *in; + gzFile out; + + if (strlen(file) + strlen(GZ_SUFFIX) >= sizeof(outfile)) { + fprintf(stderr, "%s: filename too long\n", prog); + exit(1); + } + +#if !defined(NO_snprintf) && !defined(NO_vsnprintf) + snprintf(outfile, sizeof(outfile), "%s%s", file, GZ_SUFFIX); +#else + strcpy(outfile, file); + strcat(outfile, GZ_SUFFIX); +#endif + + in = fopen(file, "rb"); + if (in == NULL) { + perror(file); + exit(1); + } + out = gzopen(outfile, mode); + if (out == NULL) { + fprintf(stderr, "%s: can't gzopen %s\n", prog, outfile); + exit(1); + } + gz_compress(in, out); + + unlink(file); +} + + +/* =========================================================================== + * Uncompress the given file and remove the original. + */ +static void file_uncompress(char *file) { + local char buf[MAX_NAME_LEN]; + char *infile, *outfile; + FILE *out; + gzFile in; + z_size_t len = strlen(file); + + if (len + strlen(GZ_SUFFIX) >= sizeof(buf)) { + fprintf(stderr, "%s: filename too long\n", prog); + exit(1); + } + +#if !defined(NO_snprintf) && !defined(NO_vsnprintf) + snprintf(buf, sizeof(buf), "%s", file); +#else + strcpy(buf, file); +#endif + + if (len > SUFFIX_LEN && strcmp(file+len-SUFFIX_LEN, GZ_SUFFIX) == 0) { + infile = file; + outfile = buf; + outfile[len-3] = '\0'; + } else { + outfile = file; + infile = buf; +#if !defined(NO_snprintf) && !defined(NO_vsnprintf) + snprintf(buf + len, sizeof(buf) - len, "%s", GZ_SUFFIX); +#else + strcat(infile, GZ_SUFFIX); +#endif + } + in = gzopen(infile, "rb"); + if (in == NULL) { + fprintf(stderr, "%s: can't gzopen %s\n", prog, infile); + exit(1); + } + out = fopen(outfile, "wb"); + if (out == NULL) { + perror(file); + exit(1); + } + + gz_uncompress(in, out); + + unlink(infile); +} + + +/* =========================================================================== + * Usage: minigzip [-c] [-d] [-f] [-h] [-r] [-1 to -9] [files...] + * -c : write to standard output + * -d : decompress + * -f : compress with Z_FILTERED + * -h : compress with Z_HUFFMAN_ONLY + * -r : compress with Z_RLE + * -1 to -9 : compression level + */ + +int main(int argc, char *argv[]) { + int copyout = 0; + int uncompr = 0; + gzFile file; + char *bname, outmode[20]; + +#if !defined(NO_snprintf) && !defined(NO_vsnprintf) + snprintf(outmode, sizeof(outmode), "%s", "wb6 "); +#else + strcpy(outmode, "wb6 "); +#endif + + prog = argv[0]; + bname = strrchr(argv[0], '/'); + if (bname) + bname++; + else + bname = argv[0]; + argc--, argv++; + + if (!strcmp(bname, "gunzip")) + uncompr = 1; + else if (!strcmp(bname, "zcat")) + copyout = uncompr = 1; + + while (argc > 0) { + if (strcmp(*argv, "-c") == 0) + copyout = 1; + else if (strcmp(*argv, "-d") == 0) + uncompr = 1; + else if (strcmp(*argv, "-f") == 0) + outmode[3] = 'f'; + else if (strcmp(*argv, "-h") == 0) + outmode[3] = 'h'; + else if (strcmp(*argv, "-r") == 0) + outmode[3] = 'R'; + else if ((*argv)[0] == '-' && (*argv)[1] >= '1' && (*argv)[1] <= '9' && + (*argv)[2] == 0) + outmode[2] = (*argv)[1]; + else + break; + argc--, argv++; + } + if (outmode[3] == ' ') + outmode[3] = 0; + if (argc == 0) { + SET_BINARY_MODE(stdin); + SET_BINARY_MODE(stdout); + if (uncompr) { + file = gzdopen(fileno(stdin), "rb"); + if (file == NULL) error("can't gzdopen stdin"); + gz_uncompress(file, stdout); + } else { + file = gzdopen(fileno(stdout), outmode); + if (file == NULL) error("can't gzdopen stdout"); + gz_compress(stdin, file); + } + } else { + if (copyout) { + SET_BINARY_MODE(stdout); + } + do { + if (uncompr) { + if (copyout) { + file = gzopen(*argv, "rb"); + if (file == NULL) + fprintf(stderr, "%s: can't gzopen %s\n", prog, *argv); + else + gz_uncompress(file, stdout); + } else { + file_uncompress(*argv); + } + } else { + if (copyout) { + FILE * in = fopen(*argv, "rb"); + + if (in == NULL) { + perror(*argv); + } else { + file = gzdopen(fileno(stdout), outmode); + if (file == NULL) error("can't gzdopen stdout"); + + gz_compress(in, file); + } + + } else { + file_compress(*argv, outmode); + } + } + } while (argv++, --argc); + } + return 0; +} diff --git a/c-blosc/internal-complibs/zlib-1.3.1/trees.c b/c-blosc/internal-complibs/zlib-1.3.1/trees.c new file mode 100644 index 0000000..6a523ef --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.3.1/trees.c @@ -0,0 +1,1117 @@ +/* trees.c -- output deflated data using Huffman coding + * Copyright (C) 1995-2024 Jean-loup Gailly + * detect_data_type() function provided freely by Cosmin Truta, 2006 + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* + * ALGORITHM + * + * The "deflation" process uses several Huffman trees. The more + * common source values are represented by shorter bit sequences. + * + * Each code tree is stored in a compressed form which is itself + * a Huffman encoding of the lengths of all the code strings (in + * ascending order by source values). The actual code strings are + * reconstructed from the lengths in the inflate process, as described + * in the deflate specification. + * + * REFERENCES + * + * Deutsch, L.P.,"'Deflate' Compressed Data Format Specification". + * Available in ftp.uu.net:/pub/archiving/zip/doc/deflate-1.1.doc + * + * Storer, James A. + * Data Compression: Methods and Theory, pp. 49-50. + * Computer Science Press, 1988. ISBN 0-7167-8156-5. + * + * Sedgewick, R. + * Algorithms, p290. + * Addison-Wesley, 1983. ISBN 0-201-06672-6. + */ + +/* @(#) $Id$ */ + +/* #define GEN_TREES_H */ + +#include "deflate.h" + +#ifdef ZLIB_DEBUG +# include +#endif + +/* =========================================================================== + * Constants + */ + +#define MAX_BL_BITS 7 +/* Bit length codes must not exceed MAX_BL_BITS bits */ + +#define END_BLOCK 256 +/* end of block literal code */ + +#define REP_3_6 16 +/* repeat previous bit length 3-6 times (2 bits of repeat count) */ + +#define REPZ_3_10 17 +/* repeat a zero length 3-10 times (3 bits of repeat count) */ + +#define REPZ_11_138 18 +/* repeat a zero length 11-138 times (7 bits of repeat count) */ + +local const int extra_lbits[LENGTH_CODES] /* extra bits for each length code */ + = {0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0}; + +local const int extra_dbits[D_CODES] /* extra bits for each distance code */ + = {0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13}; + +local const int extra_blbits[BL_CODES]/* extra bits for each bit length code */ + = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,3,7}; + +local const uch bl_order[BL_CODES] + = {16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15}; +/* The lengths of the bit length codes are sent in order of decreasing + * probability, to avoid transmitting the lengths for unused bit length codes. + */ + +/* =========================================================================== + * Local data. These are initialized only once. + */ + +#define DIST_CODE_LEN 512 /* see definition of array dist_code below */ + +#if defined(GEN_TREES_H) || !defined(STDC) +/* non ANSI compilers may not accept trees.h */ + +local ct_data static_ltree[L_CODES+2]; +/* The static literal tree. Since the bit lengths are imposed, there is no + * need for the L_CODES extra codes used during heap construction. However + * The codes 286 and 287 are needed to build a canonical tree (see _tr_init + * below). + */ + +local ct_data static_dtree[D_CODES]; +/* The static distance tree. (Actually a trivial tree since all codes use + * 5 bits.) + */ + +uch _dist_code[DIST_CODE_LEN]; +/* Distance codes. The first 256 values correspond to the distances + * 3 .. 258, the last 256 values correspond to the top 8 bits of + * the 15 bit distances. + */ + +uch _length_code[MAX_MATCH-MIN_MATCH+1]; +/* length code for each normalized match length (0 == MIN_MATCH) */ + +local int base_length[LENGTH_CODES]; +/* First normalized length for each code (0 = MIN_MATCH) */ + +local int base_dist[D_CODES]; +/* First normalized distance for each code (0 = distance of 1) */ + +#else +# include "trees.h" +#endif /* GEN_TREES_H */ + +struct static_tree_desc_s { + const ct_data *static_tree; /* static tree or NULL */ + const intf *extra_bits; /* extra bits for each code or NULL */ + int extra_base; /* base index for extra_bits */ + int elems; /* max number of elements in the tree */ + int max_length; /* max bit length for the codes */ +}; + +#ifdef NO_INIT_GLOBAL_POINTERS +# define TCONST +#else +# define TCONST const +#endif + +local TCONST static_tree_desc static_l_desc = +{static_ltree, extra_lbits, LITERALS+1, L_CODES, MAX_BITS}; + +local TCONST static_tree_desc static_d_desc = +{static_dtree, extra_dbits, 0, D_CODES, MAX_BITS}; + +local TCONST static_tree_desc static_bl_desc = +{(const ct_data *)0, extra_blbits, 0, BL_CODES, MAX_BL_BITS}; + +/* =========================================================================== + * Output a short LSB first on the stream. + * IN assertion: there is enough room in pendingBuf. + */ +#define put_short(s, w) { \ + put_byte(s, (uch)((w) & 0xff)); \ + put_byte(s, (uch)((ush)(w) >> 8)); \ +} + +/* =========================================================================== + * Reverse the first len bits of a code, using straightforward code (a faster + * method would use a table) + * IN assertion: 1 <= len <= 15 + */ +local unsigned bi_reverse(unsigned code, int len) { + register unsigned res = 0; + do { + res |= code & 1; + code >>= 1, res <<= 1; + } while (--len > 0); + return res >> 1; +} + +/* =========================================================================== + * Flush the bit buffer, keeping at most 7 bits in it. + */ +local void bi_flush(deflate_state *s) { + if (s->bi_valid == 16) { + put_short(s, s->bi_buf); + s->bi_buf = 0; + s->bi_valid = 0; + } else if (s->bi_valid >= 8) { + put_byte(s, (Byte)s->bi_buf); + s->bi_buf >>= 8; + s->bi_valid -= 8; + } +} + +/* =========================================================================== + * Flush the bit buffer and align the output on a byte boundary + */ +local void bi_windup(deflate_state *s) { + if (s->bi_valid > 8) { + put_short(s, s->bi_buf); + } else if (s->bi_valid > 0) { + put_byte(s, (Byte)s->bi_buf); + } + s->bi_buf = 0; + s->bi_valid = 0; +#ifdef ZLIB_DEBUG + s->bits_sent = (s->bits_sent + 7) & ~7; +#endif +} + +/* =========================================================================== + * Generate the codes for a given tree and bit counts (which need not be + * optimal). + * IN assertion: the array bl_count contains the bit length statistics for + * the given tree and the field len is set for all tree elements. + * OUT assertion: the field code is set for all tree elements of non + * zero code length. + */ +local void gen_codes(ct_data *tree, int max_code, ushf *bl_count) { + ush next_code[MAX_BITS+1]; /* next code value for each bit length */ + unsigned code = 0; /* running code value */ + int bits; /* bit index */ + int n; /* code index */ + + /* The distribution counts are first used to generate the code values + * without bit reversal. + */ + for (bits = 1; bits <= MAX_BITS; bits++) { + code = (code + bl_count[bits - 1]) << 1; + next_code[bits] = (ush)code; + } + /* Check that the bit counts in bl_count are consistent. The last code + * must be all ones. + */ + Assert (code + bl_count[MAX_BITS] - 1 == (1 << MAX_BITS) - 1, + "inconsistent bit counts"); + Tracev((stderr,"\ngen_codes: max_code %d ", max_code)); + + for (n = 0; n <= max_code; n++) { + int len = tree[n].Len; + if (len == 0) continue; + /* Now reverse the bits */ + tree[n].Code = (ush)bi_reverse(next_code[len]++, len); + + Tracecv(tree != static_ltree, (stderr,"\nn %3d %c l %2d c %4x (%x) ", + n, (isgraph(n) ? n : ' '), len, tree[n].Code, next_code[len] - 1)); + } +} + +#ifdef GEN_TREES_H +local void gen_trees_header(void); +#endif + +#ifndef ZLIB_DEBUG +# define send_code(s, c, tree) send_bits(s, tree[c].Code, tree[c].Len) + /* Send a code of the given tree. c and tree must not have side effects */ + +#else /* !ZLIB_DEBUG */ +# define send_code(s, c, tree) \ + { if (z_verbose>2) fprintf(stderr,"\ncd %3d ",(c)); \ + send_bits(s, tree[c].Code, tree[c].Len); } +#endif + +/* =========================================================================== + * Send a value on a given number of bits. + * IN assertion: length <= 16 and value fits in length bits. + */ +#ifdef ZLIB_DEBUG +local void send_bits(deflate_state *s, int value, int length) { + Tracevv((stderr," l %2d v %4x ", length, value)); + Assert(length > 0 && length <= 15, "invalid length"); + s->bits_sent += (ulg)length; + + /* If not enough room in bi_buf, use (valid) bits from bi_buf and + * (16 - bi_valid) bits from value, leaving (width - (16 - bi_valid)) + * unused bits in value. + */ + if (s->bi_valid > (int)Buf_size - length) { + s->bi_buf |= (ush)value << s->bi_valid; + put_short(s, s->bi_buf); + s->bi_buf = (ush)value >> (Buf_size - s->bi_valid); + s->bi_valid += length - Buf_size; + } else { + s->bi_buf |= (ush)value << s->bi_valid; + s->bi_valid += length; + } +} +#else /* !ZLIB_DEBUG */ + +#define send_bits(s, value, length) \ +{ int len = length;\ + if (s->bi_valid > (int)Buf_size - len) {\ + int val = (int)value;\ + s->bi_buf |= (ush)val << s->bi_valid;\ + put_short(s, s->bi_buf);\ + s->bi_buf = (ush)val >> (Buf_size - s->bi_valid);\ + s->bi_valid += len - Buf_size;\ + } else {\ + s->bi_buf |= (ush)(value) << s->bi_valid;\ + s->bi_valid += len;\ + }\ +} +#endif /* ZLIB_DEBUG */ + + +/* the arguments must not have side effects */ + +/* =========================================================================== + * Initialize the various 'constant' tables. + */ +local void tr_static_init(void) { +#if defined(GEN_TREES_H) || !defined(STDC) + static int static_init_done = 0; + int n; /* iterates over tree elements */ + int bits; /* bit counter */ + int length; /* length value */ + int code; /* code value */ + int dist; /* distance index */ + ush bl_count[MAX_BITS+1]; + /* number of codes at each bit length for an optimal tree */ + + if (static_init_done) return; + + /* For some embedded targets, global variables are not initialized: */ +#ifdef NO_INIT_GLOBAL_POINTERS + static_l_desc.static_tree = static_ltree; + static_l_desc.extra_bits = extra_lbits; + static_d_desc.static_tree = static_dtree; + static_d_desc.extra_bits = extra_dbits; + static_bl_desc.extra_bits = extra_blbits; +#endif + + /* Initialize the mapping length (0..255) -> length code (0..28) */ + length = 0; + for (code = 0; code < LENGTH_CODES-1; code++) { + base_length[code] = length; + for (n = 0; n < (1 << extra_lbits[code]); n++) { + _length_code[length++] = (uch)code; + } + } + Assert (length == 256, "tr_static_init: length != 256"); + /* Note that the length 255 (match length 258) can be represented + * in two different ways: code 284 + 5 bits or code 285, so we + * overwrite length_code[255] to use the best encoding: + */ + _length_code[length - 1] = (uch)code; + + /* Initialize the mapping dist (0..32K) -> dist code (0..29) */ + dist = 0; + for (code = 0 ; code < 16; code++) { + base_dist[code] = dist; + for (n = 0; n < (1 << extra_dbits[code]); n++) { + _dist_code[dist++] = (uch)code; + } + } + Assert (dist == 256, "tr_static_init: dist != 256"); + dist >>= 7; /* from now on, all distances are divided by 128 */ + for ( ; code < D_CODES; code++) { + base_dist[code] = dist << 7; + for (n = 0; n < (1 << (extra_dbits[code] - 7)); n++) { + _dist_code[256 + dist++] = (uch)code; + } + } + Assert (dist == 256, "tr_static_init: 256 + dist != 512"); + + /* Construct the codes of the static literal tree */ + for (bits = 0; bits <= MAX_BITS; bits++) bl_count[bits] = 0; + n = 0; + while (n <= 143) static_ltree[n++].Len = 8, bl_count[8]++; + while (n <= 255) static_ltree[n++].Len = 9, bl_count[9]++; + while (n <= 279) static_ltree[n++].Len = 7, bl_count[7]++; + while (n <= 287) static_ltree[n++].Len = 8, bl_count[8]++; + /* Codes 286 and 287 do not exist, but we must include them in the + * tree construction to get a canonical Huffman tree (longest code + * all ones) + */ + gen_codes((ct_data *)static_ltree, L_CODES+1, bl_count); + + /* The static distance tree is trivial: */ + for (n = 0; n < D_CODES; n++) { + static_dtree[n].Len = 5; + static_dtree[n].Code = bi_reverse((unsigned)n, 5); + } + static_init_done = 1; + +# ifdef GEN_TREES_H + gen_trees_header(); +# endif +#endif /* defined(GEN_TREES_H) || !defined(STDC) */ +} + +/* =========================================================================== + * Generate the file trees.h describing the static trees. + */ +#ifdef GEN_TREES_H +# ifndef ZLIB_DEBUG +# include +# endif + +# define SEPARATOR(i, last, width) \ + ((i) == (last)? "\n};\n\n" : \ + ((i) % (width) == (width) - 1 ? ",\n" : ", ")) + +void gen_trees_header(void) { + FILE *header = fopen("trees.h", "w"); + int i; + + Assert (header != NULL, "Can't open trees.h"); + fprintf(header, + "/* header created automatically with -DGEN_TREES_H */\n\n"); + + fprintf(header, "local const ct_data static_ltree[L_CODES+2] = {\n"); + for (i = 0; i < L_CODES+2; i++) { + fprintf(header, "{{%3u},{%3u}}%s", static_ltree[i].Code, + static_ltree[i].Len, SEPARATOR(i, L_CODES+1, 5)); + } + + fprintf(header, "local const ct_data static_dtree[D_CODES] = {\n"); + for (i = 0; i < D_CODES; i++) { + fprintf(header, "{{%2u},{%2u}}%s", static_dtree[i].Code, + static_dtree[i].Len, SEPARATOR(i, D_CODES-1, 5)); + } + + fprintf(header, "const uch ZLIB_INTERNAL _dist_code[DIST_CODE_LEN] = {\n"); + for (i = 0; i < DIST_CODE_LEN; i++) { + fprintf(header, "%2u%s", _dist_code[i], + SEPARATOR(i, DIST_CODE_LEN-1, 20)); + } + + fprintf(header, + "const uch ZLIB_INTERNAL _length_code[MAX_MATCH-MIN_MATCH+1]= {\n"); + for (i = 0; i < MAX_MATCH-MIN_MATCH+1; i++) { + fprintf(header, "%2u%s", _length_code[i], + SEPARATOR(i, MAX_MATCH-MIN_MATCH, 20)); + } + + fprintf(header, "local const int base_length[LENGTH_CODES] = {\n"); + for (i = 0; i < LENGTH_CODES; i++) { + fprintf(header, "%1u%s", base_length[i], + SEPARATOR(i, LENGTH_CODES-1, 20)); + } + + fprintf(header, "local const int base_dist[D_CODES] = {\n"); + for (i = 0; i < D_CODES; i++) { + fprintf(header, "%5u%s", base_dist[i], + SEPARATOR(i, D_CODES-1, 10)); + } + + fclose(header); +} +#endif /* GEN_TREES_H */ + +/* =========================================================================== + * Initialize a new block. + */ +local void init_block(deflate_state *s) { + int n; /* iterates over tree elements */ + + /* Initialize the trees. */ + for (n = 0; n < L_CODES; n++) s->dyn_ltree[n].Freq = 0; + for (n = 0; n < D_CODES; n++) s->dyn_dtree[n].Freq = 0; + for (n = 0; n < BL_CODES; n++) s->bl_tree[n].Freq = 0; + + s->dyn_ltree[END_BLOCK].Freq = 1; + s->opt_len = s->static_len = 0L; + s->sym_next = s->matches = 0; +} + +/* =========================================================================== + * Initialize the tree data structures for a new zlib stream. + */ +void ZLIB_INTERNAL _tr_init(deflate_state *s) { + tr_static_init(); + + s->l_desc.dyn_tree = s->dyn_ltree; + s->l_desc.stat_desc = &static_l_desc; + + s->d_desc.dyn_tree = s->dyn_dtree; + s->d_desc.stat_desc = &static_d_desc; + + s->bl_desc.dyn_tree = s->bl_tree; + s->bl_desc.stat_desc = &static_bl_desc; + + s->bi_buf = 0; + s->bi_valid = 0; +#ifdef ZLIB_DEBUG + s->compressed_len = 0L; + s->bits_sent = 0L; +#endif + + /* Initialize the first block of the first file: */ + init_block(s); +} + +#define SMALLEST 1 +/* Index within the heap array of least frequent node in the Huffman tree */ + + +/* =========================================================================== + * Remove the smallest element from the heap and recreate the heap with + * one less element. Updates heap and heap_len. + */ +#define pqremove(s, tree, top) \ +{\ + top = s->heap[SMALLEST]; \ + s->heap[SMALLEST] = s->heap[s->heap_len--]; \ + pqdownheap(s, tree, SMALLEST); \ +} + +/* =========================================================================== + * Compares to subtrees, using the tree depth as tie breaker when + * the subtrees have equal frequency. This minimizes the worst case length. + */ +#define smaller(tree, n, m, depth) \ + (tree[n].Freq < tree[m].Freq || \ + (tree[n].Freq == tree[m].Freq && depth[n] <= depth[m])) + +/* =========================================================================== + * Restore the heap property by moving down the tree starting at node k, + * exchanging a node with the smallest of its two sons if necessary, stopping + * when the heap property is re-established (each father smaller than its + * two sons). + */ +local void pqdownheap(deflate_state *s, ct_data *tree, int k) { + int v = s->heap[k]; + int j = k << 1; /* left son of k */ + while (j <= s->heap_len) { + /* Set j to the smallest of the two sons: */ + if (j < s->heap_len && + smaller(tree, s->heap[j + 1], s->heap[j], s->depth)) { + j++; + } + /* Exit if v is smaller than both sons */ + if (smaller(tree, v, s->heap[j], s->depth)) break; + + /* Exchange v with the smallest son */ + s->heap[k] = s->heap[j]; k = j; + + /* And continue down the tree, setting j to the left son of k */ + j <<= 1; + } + s->heap[k] = v; +} + +/* =========================================================================== + * Compute the optimal bit lengths for a tree and update the total bit length + * for the current block. + * IN assertion: the fields freq and dad are set, heap[heap_max] and + * above are the tree nodes sorted by increasing frequency. + * OUT assertions: the field len is set to the optimal bit length, the + * array bl_count contains the frequencies for each bit length. + * The length opt_len is updated; static_len is also updated if stree is + * not null. + */ +local void gen_bitlen(deflate_state *s, tree_desc *desc) { + ct_data *tree = desc->dyn_tree; + int max_code = desc->max_code; + const ct_data *stree = desc->stat_desc->static_tree; + const intf *extra = desc->stat_desc->extra_bits; + int base = desc->stat_desc->extra_base; + int max_length = desc->stat_desc->max_length; + int h; /* heap index */ + int n, m; /* iterate over the tree elements */ + int bits; /* bit length */ + int xbits; /* extra bits */ + ush f; /* frequency */ + int overflow = 0; /* number of elements with bit length too large */ + + for (bits = 0; bits <= MAX_BITS; bits++) s->bl_count[bits] = 0; + + /* In a first pass, compute the optimal bit lengths (which may + * overflow in the case of the bit length tree). + */ + tree[s->heap[s->heap_max]].Len = 0; /* root of the heap */ + + for (h = s->heap_max + 1; h < HEAP_SIZE; h++) { + n = s->heap[h]; + bits = tree[tree[n].Dad].Len + 1; + if (bits > max_length) bits = max_length, overflow++; + tree[n].Len = (ush)bits; + /* We overwrite tree[n].Dad which is no longer needed */ + + if (n > max_code) continue; /* not a leaf node */ + + s->bl_count[bits]++; + xbits = 0; + if (n >= base) xbits = extra[n - base]; + f = tree[n].Freq; + s->opt_len += (ulg)f * (unsigned)(bits + xbits); + if (stree) s->static_len += (ulg)f * (unsigned)(stree[n].Len + xbits); + } + if (overflow == 0) return; + + Tracev((stderr,"\nbit length overflow\n")); + /* This happens for example on obj2 and pic of the Calgary corpus */ + + /* Find the first bit length which could increase: */ + do { + bits = max_length - 1; + while (s->bl_count[bits] == 0) bits--; + s->bl_count[bits]--; /* move one leaf down the tree */ + s->bl_count[bits + 1] += 2; /* move one overflow item as its brother */ + s->bl_count[max_length]--; + /* The brother of the overflow item also moves one step up, + * but this does not affect bl_count[max_length] + */ + overflow -= 2; + } while (overflow > 0); + + /* Now recompute all bit lengths, scanning in increasing frequency. + * h is still equal to HEAP_SIZE. (It is simpler to reconstruct all + * lengths instead of fixing only the wrong ones. This idea is taken + * from 'ar' written by Haruhiko Okumura.) + */ + for (bits = max_length; bits != 0; bits--) { + n = s->bl_count[bits]; + while (n != 0) { + m = s->heap[--h]; + if (m > max_code) continue; + if ((unsigned) tree[m].Len != (unsigned) bits) { + Tracev((stderr,"code %d bits %d->%d\n", m, tree[m].Len, bits)); + s->opt_len += ((ulg)bits - tree[m].Len) * tree[m].Freq; + tree[m].Len = (ush)bits; + } + n--; + } + } +} + +#ifdef DUMP_BL_TREE +# include +#endif + +/* =========================================================================== + * Construct one Huffman tree and assigns the code bit strings and lengths. + * Update the total bit length for the current block. + * IN assertion: the field freq is set for all tree elements. + * OUT assertions: the fields len and code are set to the optimal bit length + * and corresponding code. The length opt_len is updated; static_len is + * also updated if stree is not null. The field max_code is set. + */ +local void build_tree(deflate_state *s, tree_desc *desc) { + ct_data *tree = desc->dyn_tree; + const ct_data *stree = desc->stat_desc->static_tree; + int elems = desc->stat_desc->elems; + int n, m; /* iterate over heap elements */ + int max_code = -1; /* largest code with non zero frequency */ + int node; /* new node being created */ + + /* Construct the initial heap, with least frequent element in + * heap[SMALLEST]. The sons of heap[n] are heap[2*n] and heap[2*n + 1]. + * heap[0] is not used. + */ + s->heap_len = 0, s->heap_max = HEAP_SIZE; + + for (n = 0; n < elems; n++) { + if (tree[n].Freq != 0) { + s->heap[++(s->heap_len)] = max_code = n; + s->depth[n] = 0; + } else { + tree[n].Len = 0; + } + } + + /* The pkzip format requires that at least one distance code exists, + * and that at least one bit should be sent even if there is only one + * possible code. So to avoid special checks later on we force at least + * two codes of non zero frequency. + */ + while (s->heap_len < 2) { + node = s->heap[++(s->heap_len)] = (max_code < 2 ? ++max_code : 0); + tree[node].Freq = 1; + s->depth[node] = 0; + s->opt_len--; if (stree) s->static_len -= stree[node].Len; + /* node is 0 or 1 so it does not have extra bits */ + } + desc->max_code = max_code; + + /* The elements heap[heap_len/2 + 1 .. heap_len] are leaves of the tree, + * establish sub-heaps of increasing lengths: + */ + for (n = s->heap_len/2; n >= 1; n--) pqdownheap(s, tree, n); + + /* Construct the Huffman tree by repeatedly combining the least two + * frequent nodes. + */ + node = elems; /* next internal node of the tree */ + do { + pqremove(s, tree, n); /* n = node of least frequency */ + m = s->heap[SMALLEST]; /* m = node of next least frequency */ + + s->heap[--(s->heap_max)] = n; /* keep the nodes sorted by frequency */ + s->heap[--(s->heap_max)] = m; + + /* Create a new node father of n and m */ + tree[node].Freq = tree[n].Freq + tree[m].Freq; + s->depth[node] = (uch)((s->depth[n] >= s->depth[m] ? + s->depth[n] : s->depth[m]) + 1); + tree[n].Dad = tree[m].Dad = (ush)node; +#ifdef DUMP_BL_TREE + if (tree == s->bl_tree) { + fprintf(stderr,"\nnode %d(%d), sons %d(%d) %d(%d)", + node, tree[node].Freq, n, tree[n].Freq, m, tree[m].Freq); + } +#endif + /* and insert the new node in the heap */ + s->heap[SMALLEST] = node++; + pqdownheap(s, tree, SMALLEST); + + } while (s->heap_len >= 2); + + s->heap[--(s->heap_max)] = s->heap[SMALLEST]; + + /* At this point, the fields freq and dad are set. We can now + * generate the bit lengths. + */ + gen_bitlen(s, (tree_desc *)desc); + + /* The field len is now set, we can generate the bit codes */ + gen_codes ((ct_data *)tree, max_code, s->bl_count); +} + +/* =========================================================================== + * Scan a literal or distance tree to determine the frequencies of the codes + * in the bit length tree. + */ +local void scan_tree(deflate_state *s, ct_data *tree, int max_code) { + int n; /* iterates over all tree elements */ + int prevlen = -1; /* last emitted length */ + int curlen; /* length of current code */ + int nextlen = tree[0].Len; /* length of next code */ + int count = 0; /* repeat count of the current code */ + int max_count = 7; /* max repeat count */ + int min_count = 4; /* min repeat count */ + + if (nextlen == 0) max_count = 138, min_count = 3; + tree[max_code + 1].Len = (ush)0xffff; /* guard */ + + for (n = 0; n <= max_code; n++) { + curlen = nextlen; nextlen = tree[n + 1].Len; + if (++count < max_count && curlen == nextlen) { + continue; + } else if (count < min_count) { + s->bl_tree[curlen].Freq += count; + } else if (curlen != 0) { + if (curlen != prevlen) s->bl_tree[curlen].Freq++; + s->bl_tree[REP_3_6].Freq++; + } else if (count <= 10) { + s->bl_tree[REPZ_3_10].Freq++; + } else { + s->bl_tree[REPZ_11_138].Freq++; + } + count = 0; prevlen = curlen; + if (nextlen == 0) { + max_count = 138, min_count = 3; + } else if (curlen == nextlen) { + max_count = 6, min_count = 3; + } else { + max_count = 7, min_count = 4; + } + } +} + +/* =========================================================================== + * Send a literal or distance tree in compressed form, using the codes in + * bl_tree. + */ +local void send_tree(deflate_state *s, ct_data *tree, int max_code) { + int n; /* iterates over all tree elements */ + int prevlen = -1; /* last emitted length */ + int curlen; /* length of current code */ + int nextlen = tree[0].Len; /* length of next code */ + int count = 0; /* repeat count of the current code */ + int max_count = 7; /* max repeat count */ + int min_count = 4; /* min repeat count */ + + /* tree[max_code + 1].Len = -1; */ /* guard already set */ + if (nextlen == 0) max_count = 138, min_count = 3; + + for (n = 0; n <= max_code; n++) { + curlen = nextlen; nextlen = tree[n + 1].Len; + if (++count < max_count && curlen == nextlen) { + continue; + } else if (count < min_count) { + do { send_code(s, curlen, s->bl_tree); } while (--count != 0); + + } else if (curlen != 0) { + if (curlen != prevlen) { + send_code(s, curlen, s->bl_tree); count--; + } + Assert(count >= 3 && count <= 6, " 3_6?"); + send_code(s, REP_3_6, s->bl_tree); send_bits(s, count - 3, 2); + + } else if (count <= 10) { + send_code(s, REPZ_3_10, s->bl_tree); send_bits(s, count - 3, 3); + + } else { + send_code(s, REPZ_11_138, s->bl_tree); send_bits(s, count - 11, 7); + } + count = 0; prevlen = curlen; + if (nextlen == 0) { + max_count = 138, min_count = 3; + } else if (curlen == nextlen) { + max_count = 6, min_count = 3; + } else { + max_count = 7, min_count = 4; + } + } +} + +/* =========================================================================== + * Construct the Huffman tree for the bit lengths and return the index in + * bl_order of the last bit length code to send. + */ +local int build_bl_tree(deflate_state *s) { + int max_blindex; /* index of last bit length code of non zero freq */ + + /* Determine the bit length frequencies for literal and distance trees */ + scan_tree(s, (ct_data *)s->dyn_ltree, s->l_desc.max_code); + scan_tree(s, (ct_data *)s->dyn_dtree, s->d_desc.max_code); + + /* Build the bit length tree: */ + build_tree(s, (tree_desc *)(&(s->bl_desc))); + /* opt_len now includes the length of the tree representations, except the + * lengths of the bit lengths codes and the 5 + 5 + 4 bits for the counts. + */ + + /* Determine the number of bit length codes to send. The pkzip format + * requires that at least 4 bit length codes be sent. (appnote.txt says + * 3 but the actual value used is 4.) + */ + for (max_blindex = BL_CODES-1; max_blindex >= 3; max_blindex--) { + if (s->bl_tree[bl_order[max_blindex]].Len != 0) break; + } + /* Update opt_len to include the bit length tree and counts */ + s->opt_len += 3*((ulg)max_blindex + 1) + 5 + 5 + 4; + Tracev((stderr, "\ndyn trees: dyn %ld, stat %ld", + s->opt_len, s->static_len)); + + return max_blindex; +} + +/* =========================================================================== + * Send the header for a block using dynamic Huffman trees: the counts, the + * lengths of the bit length codes, the literal tree and the distance tree. + * IN assertion: lcodes >= 257, dcodes >= 1, blcodes >= 4. + */ +local void send_all_trees(deflate_state *s, int lcodes, int dcodes, + int blcodes) { + int rank; /* index in bl_order */ + + Assert (lcodes >= 257 && dcodes >= 1 && blcodes >= 4, "not enough codes"); + Assert (lcodes <= L_CODES && dcodes <= D_CODES && blcodes <= BL_CODES, + "too many codes"); + Tracev((stderr, "\nbl counts: ")); + send_bits(s, lcodes - 257, 5); /* not +255 as stated in appnote.txt */ + send_bits(s, dcodes - 1, 5); + send_bits(s, blcodes - 4, 4); /* not -3 as stated in appnote.txt */ + for (rank = 0; rank < blcodes; rank++) { + Tracev((stderr, "\nbl code %2d ", bl_order[rank])); + send_bits(s, s->bl_tree[bl_order[rank]].Len, 3); + } + Tracev((stderr, "\nbl tree: sent %ld", s->bits_sent)); + + send_tree(s, (ct_data *)s->dyn_ltree, lcodes - 1); /* literal tree */ + Tracev((stderr, "\nlit tree: sent %ld", s->bits_sent)); + + send_tree(s, (ct_data *)s->dyn_dtree, dcodes - 1); /* distance tree */ + Tracev((stderr, "\ndist tree: sent %ld", s->bits_sent)); +} + +/* =========================================================================== + * Send a stored block + */ +void ZLIB_INTERNAL _tr_stored_block(deflate_state *s, charf *buf, + ulg stored_len, int last) { + send_bits(s, (STORED_BLOCK<<1) + last, 3); /* send block type */ + bi_windup(s); /* align on byte boundary */ + put_short(s, (ush)stored_len); + put_short(s, (ush)~stored_len); + if (stored_len) + zmemcpy(s->pending_buf + s->pending, (Bytef *)buf, stored_len); + s->pending += stored_len; +#ifdef ZLIB_DEBUG + s->compressed_len = (s->compressed_len + 3 + 7) & (ulg)~7L; + s->compressed_len += (stored_len + 4) << 3; + s->bits_sent += 2*16; + s->bits_sent += stored_len << 3; +#endif +} + +/* =========================================================================== + * Flush the bits in the bit buffer to pending output (leaves at most 7 bits) + */ +void ZLIB_INTERNAL _tr_flush_bits(deflate_state *s) { + bi_flush(s); +} + +/* =========================================================================== + * Send one empty static block to give enough lookahead for inflate. + * This takes 10 bits, of which 7 may remain in the bit buffer. + */ +void ZLIB_INTERNAL _tr_align(deflate_state *s) { + send_bits(s, STATIC_TREES<<1, 3); + send_code(s, END_BLOCK, static_ltree); +#ifdef ZLIB_DEBUG + s->compressed_len += 10L; /* 3 for block type, 7 for EOB */ +#endif + bi_flush(s); +} + +/* =========================================================================== + * Send the block data compressed using the given Huffman trees + */ +local void compress_block(deflate_state *s, const ct_data *ltree, + const ct_data *dtree) { + unsigned dist; /* distance of matched string */ + int lc; /* match length or unmatched char (if dist == 0) */ + unsigned sx = 0; /* running index in symbol buffers */ + unsigned code; /* the code to send */ + int extra; /* number of extra bits to send */ + + if (s->sym_next != 0) do { +#ifdef LIT_MEM + dist = s->d_buf[sx]; + lc = s->l_buf[sx++]; +#else + dist = s->sym_buf[sx++] & 0xff; + dist += (unsigned)(s->sym_buf[sx++] & 0xff) << 8; + lc = s->sym_buf[sx++]; +#endif + if (dist == 0) { + send_code(s, lc, ltree); /* send a literal byte */ + Tracecv(isgraph(lc), (stderr," '%c' ", lc)); + } else { + /* Here, lc is the match length - MIN_MATCH */ + code = _length_code[lc]; + send_code(s, code + LITERALS + 1, ltree); /* send length code */ + extra = extra_lbits[code]; + if (extra != 0) { + lc -= base_length[code]; + send_bits(s, lc, extra); /* send the extra length bits */ + } + dist--; /* dist is now the match distance - 1 */ + code = d_code(dist); + Assert (code < D_CODES, "bad d_code"); + + send_code(s, code, dtree); /* send the distance code */ + extra = extra_dbits[code]; + if (extra != 0) { + dist -= (unsigned)base_dist[code]; + send_bits(s, dist, extra); /* send the extra distance bits */ + } + } /* literal or match pair ? */ + + /* Check for no overlay of pending_buf on needed symbols */ +#ifdef LIT_MEM + Assert(s->pending < 2 * (s->lit_bufsize + sx), "pendingBuf overflow"); +#else + Assert(s->pending < s->lit_bufsize + sx, "pendingBuf overflow"); +#endif + + } while (sx < s->sym_next); + + send_code(s, END_BLOCK, ltree); +} + +/* =========================================================================== + * Check if the data type is TEXT or BINARY, using the following algorithm: + * - TEXT if the two conditions below are satisfied: + * a) There are no non-portable control characters belonging to the + * "block list" (0..6, 14..25, 28..31). + * b) There is at least one printable character belonging to the + * "allow list" (9 {TAB}, 10 {LF}, 13 {CR}, 32..255). + * - BINARY otherwise. + * - The following partially-portable control characters form a + * "gray list" that is ignored in this detection algorithm: + * (7 {BEL}, 8 {BS}, 11 {VT}, 12 {FF}, 26 {SUB}, 27 {ESC}). + * IN assertion: the fields Freq of dyn_ltree are set. + */ +local int detect_data_type(deflate_state *s) { + /* block_mask is the bit mask of block-listed bytes + * set bits 0..6, 14..25, and 28..31 + * 0xf3ffc07f = binary 11110011111111111100000001111111 + */ + unsigned long block_mask = 0xf3ffc07fUL; + int n; + + /* Check for non-textual ("block-listed") bytes. */ + for (n = 0; n <= 31; n++, block_mask >>= 1) + if ((block_mask & 1) && (s->dyn_ltree[n].Freq != 0)) + return Z_BINARY; + + /* Check for textual ("allow-listed") bytes. */ + if (s->dyn_ltree[9].Freq != 0 || s->dyn_ltree[10].Freq != 0 + || s->dyn_ltree[13].Freq != 0) + return Z_TEXT; + for (n = 32; n < LITERALS; n++) + if (s->dyn_ltree[n].Freq != 0) + return Z_TEXT; + + /* There are no "block-listed" or "allow-listed" bytes: + * this stream either is empty or has tolerated ("gray-listed") bytes only. + */ + return Z_BINARY; +} + +/* =========================================================================== + * Determine the best encoding for the current block: dynamic trees, static + * trees or store, and write out the encoded block. + */ +void ZLIB_INTERNAL _tr_flush_block(deflate_state *s, charf *buf, + ulg stored_len, int last) { + ulg opt_lenb, static_lenb; /* opt_len and static_len in bytes */ + int max_blindex = 0; /* index of last bit length code of non zero freq */ + + /* Build the Huffman trees unless a stored block is forced */ + if (s->level > 0) { + + /* Check if the file is binary or text */ + if (s->strm->data_type == Z_UNKNOWN) + s->strm->data_type = detect_data_type(s); + + /* Construct the literal and distance trees */ + build_tree(s, (tree_desc *)(&(s->l_desc))); + Tracev((stderr, "\nlit data: dyn %ld, stat %ld", s->opt_len, + s->static_len)); + + build_tree(s, (tree_desc *)(&(s->d_desc))); + Tracev((stderr, "\ndist data: dyn %ld, stat %ld", s->opt_len, + s->static_len)); + /* At this point, opt_len and static_len are the total bit lengths of + * the compressed block data, excluding the tree representations. + */ + + /* Build the bit length tree for the above two trees, and get the index + * in bl_order of the last bit length code to send. + */ + max_blindex = build_bl_tree(s); + + /* Determine the best encoding. Compute the block lengths in bytes. */ + opt_lenb = (s->opt_len + 3 + 7) >> 3; + static_lenb = (s->static_len + 3 + 7) >> 3; + + Tracev((stderr, "\nopt %lu(%lu) stat %lu(%lu) stored %lu lit %u ", + opt_lenb, s->opt_len, static_lenb, s->static_len, stored_len, + s->sym_next / 3)); + +#ifndef FORCE_STATIC + if (static_lenb <= opt_lenb || s->strategy == Z_FIXED) +#endif + opt_lenb = static_lenb; + + } else { + Assert(buf != (char*)0, "lost buf"); + opt_lenb = static_lenb = stored_len + 5; /* force a stored block */ + } + +#ifdef FORCE_STORED + if (buf != (char*)0) { /* force stored block */ +#else + if (stored_len + 4 <= opt_lenb && buf != (char*)0) { + /* 4: two words for the lengths */ +#endif + /* The test buf != NULL is only necessary if LIT_BUFSIZE > WSIZE. + * Otherwise we can't have processed more than WSIZE input bytes since + * the last block flush, because compression would have been + * successful. If LIT_BUFSIZE <= WSIZE, it is never too late to + * transform a block into a stored block. + */ + _tr_stored_block(s, buf, stored_len, last); + + } else if (static_lenb == opt_lenb) { + send_bits(s, (STATIC_TREES<<1) + last, 3); + compress_block(s, (const ct_data *)static_ltree, + (const ct_data *)static_dtree); +#ifdef ZLIB_DEBUG + s->compressed_len += 3 + s->static_len; +#endif + } else { + send_bits(s, (DYN_TREES<<1) + last, 3); + send_all_trees(s, s->l_desc.max_code + 1, s->d_desc.max_code + 1, + max_blindex + 1); + compress_block(s, (const ct_data *)s->dyn_ltree, + (const ct_data *)s->dyn_dtree); +#ifdef ZLIB_DEBUG + s->compressed_len += 3 + s->opt_len; +#endif + } + Assert (s->compressed_len == s->bits_sent, "bad compressed size"); + /* The above check is made mod 2^32, for files larger than 512 MB + * and uLong implemented on 32 bits. + */ + init_block(s); + + if (last) { + bi_windup(s); +#ifdef ZLIB_DEBUG + s->compressed_len += 7; /* align on byte boundary */ +#endif + } + Tracev((stderr,"\ncomprlen %lu(%lu) ", s->compressed_len >> 3, + s->compressed_len - 7*last)); +} + +/* =========================================================================== + * Save the match info and tally the frequency counts. Return true if + * the current block must be flushed. + */ +int ZLIB_INTERNAL _tr_tally(deflate_state *s, unsigned dist, unsigned lc) { +#ifdef LIT_MEM + s->d_buf[s->sym_next] = (ush)dist; + s->l_buf[s->sym_next++] = (uch)lc; +#else + s->sym_buf[s->sym_next++] = (uch)dist; + s->sym_buf[s->sym_next++] = (uch)(dist >> 8); + s->sym_buf[s->sym_next++] = (uch)lc; +#endif + if (dist == 0) { + /* lc is the unmatched char */ + s->dyn_ltree[lc].Freq++; + } else { + s->matches++; + /* Here, lc is the match length - MIN_MATCH */ + dist--; /* dist = match distance - 1 */ + Assert((ush)dist < (ush)MAX_DIST(s) && + (ush)lc <= (ush)(MAX_MATCH-MIN_MATCH) && + (ush)d_code(dist) < (ush)D_CODES, "_tr_tally: bad match"); + + s->dyn_ltree[_length_code[lc] + LITERALS + 1].Freq++; + s->dyn_dtree[d_code(dist)].Freq++; + } + return (s->sym_next == s->sym_end); +} diff --git a/c-blosc/internal-complibs/zlib-1.3.1/trees.h b/c-blosc/internal-complibs/zlib-1.3.1/trees.h new file mode 100644 index 0000000..d35639d --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.3.1/trees.h @@ -0,0 +1,128 @@ +/* header created automatically with -DGEN_TREES_H */ + +local const ct_data static_ltree[L_CODES+2] = { +{{ 12},{ 8}}, {{140},{ 8}}, {{ 76},{ 8}}, {{204},{ 8}}, {{ 44},{ 8}}, +{{172},{ 8}}, {{108},{ 8}}, {{236},{ 8}}, {{ 28},{ 8}}, {{156},{ 8}}, +{{ 92},{ 8}}, {{220},{ 8}}, {{ 60},{ 8}}, {{188},{ 8}}, {{124},{ 8}}, +{{252},{ 8}}, {{ 2},{ 8}}, {{130},{ 8}}, {{ 66},{ 8}}, {{194},{ 8}}, +{{ 34},{ 8}}, {{162},{ 8}}, {{ 98},{ 8}}, {{226},{ 8}}, {{ 18},{ 8}}, +{{146},{ 8}}, {{ 82},{ 8}}, {{210},{ 8}}, {{ 50},{ 8}}, {{178},{ 8}}, +{{114},{ 8}}, {{242},{ 8}}, {{ 10},{ 8}}, {{138},{ 8}}, {{ 74},{ 8}}, +{{202},{ 8}}, {{ 42},{ 8}}, {{170},{ 8}}, {{106},{ 8}}, {{234},{ 8}}, +{{ 26},{ 8}}, {{154},{ 8}}, {{ 90},{ 8}}, {{218},{ 8}}, {{ 58},{ 8}}, +{{186},{ 8}}, {{122},{ 8}}, {{250},{ 8}}, {{ 6},{ 8}}, {{134},{ 8}}, +{{ 70},{ 8}}, {{198},{ 8}}, {{ 38},{ 8}}, {{166},{ 8}}, {{102},{ 8}}, +{{230},{ 8}}, {{ 22},{ 8}}, {{150},{ 8}}, {{ 86},{ 8}}, {{214},{ 8}}, +{{ 54},{ 8}}, {{182},{ 8}}, {{118},{ 8}}, {{246},{ 8}}, {{ 14},{ 8}}, +{{142},{ 8}}, {{ 78},{ 8}}, {{206},{ 8}}, {{ 46},{ 8}}, {{174},{ 8}}, +{{110},{ 8}}, {{238},{ 8}}, {{ 30},{ 8}}, {{158},{ 8}}, {{ 94},{ 8}}, +{{222},{ 8}}, {{ 62},{ 8}}, {{190},{ 8}}, {{126},{ 8}}, {{254},{ 8}}, +{{ 1},{ 8}}, {{129},{ 8}}, {{ 65},{ 8}}, {{193},{ 8}}, {{ 33},{ 8}}, +{{161},{ 8}}, {{ 97},{ 8}}, {{225},{ 8}}, {{ 17},{ 8}}, {{145},{ 8}}, +{{ 81},{ 8}}, {{209},{ 8}}, {{ 49},{ 8}}, {{177},{ 8}}, {{113},{ 8}}, +{{241},{ 8}}, {{ 9},{ 8}}, {{137},{ 8}}, {{ 73},{ 8}}, {{201},{ 8}}, +{{ 41},{ 8}}, {{169},{ 8}}, {{105},{ 8}}, {{233},{ 8}}, {{ 25},{ 8}}, +{{153},{ 8}}, {{ 89},{ 8}}, {{217},{ 8}}, {{ 57},{ 8}}, {{185},{ 8}}, +{{121},{ 8}}, {{249},{ 8}}, {{ 5},{ 8}}, {{133},{ 8}}, {{ 69},{ 8}}, +{{197},{ 8}}, {{ 37},{ 8}}, {{165},{ 8}}, {{101},{ 8}}, {{229},{ 8}}, +{{ 21},{ 8}}, {{149},{ 8}}, {{ 85},{ 8}}, {{213},{ 8}}, {{ 53},{ 8}}, +{{181},{ 8}}, {{117},{ 8}}, {{245},{ 8}}, {{ 13},{ 8}}, {{141},{ 8}}, +{{ 77},{ 8}}, {{205},{ 8}}, {{ 45},{ 8}}, {{173},{ 8}}, {{109},{ 8}}, +{{237},{ 8}}, {{ 29},{ 8}}, {{157},{ 8}}, {{ 93},{ 8}}, {{221},{ 8}}, +{{ 61},{ 8}}, {{189},{ 8}}, {{125},{ 8}}, {{253},{ 8}}, {{ 19},{ 9}}, +{{275},{ 9}}, {{147},{ 9}}, {{403},{ 9}}, {{ 83},{ 9}}, {{339},{ 9}}, +{{211},{ 9}}, {{467},{ 9}}, {{ 51},{ 9}}, {{307},{ 9}}, {{179},{ 9}}, +{{435},{ 9}}, {{115},{ 9}}, {{371},{ 9}}, {{243},{ 9}}, {{499},{ 9}}, +{{ 11},{ 9}}, {{267},{ 9}}, {{139},{ 9}}, {{395},{ 9}}, {{ 75},{ 9}}, +{{331},{ 9}}, {{203},{ 9}}, {{459},{ 9}}, {{ 43},{ 9}}, {{299},{ 9}}, +{{171},{ 9}}, {{427},{ 9}}, {{107},{ 9}}, {{363},{ 9}}, {{235},{ 9}}, +{{491},{ 9}}, {{ 27},{ 9}}, {{283},{ 9}}, {{155},{ 9}}, {{411},{ 9}}, +{{ 91},{ 9}}, {{347},{ 9}}, {{219},{ 9}}, {{475},{ 9}}, {{ 59},{ 9}}, +{{315},{ 9}}, {{187},{ 9}}, {{443},{ 9}}, {{123},{ 9}}, {{379},{ 9}}, +{{251},{ 9}}, {{507},{ 9}}, {{ 7},{ 9}}, {{263},{ 9}}, {{135},{ 9}}, +{{391},{ 9}}, {{ 71},{ 9}}, {{327},{ 9}}, {{199},{ 9}}, {{455},{ 9}}, +{{ 39},{ 9}}, {{295},{ 9}}, {{167},{ 9}}, {{423},{ 9}}, {{103},{ 9}}, +{{359},{ 9}}, {{231},{ 9}}, {{487},{ 9}}, {{ 23},{ 9}}, {{279},{ 9}}, +{{151},{ 9}}, {{407},{ 9}}, {{ 87},{ 9}}, {{343},{ 9}}, {{215},{ 9}}, +{{471},{ 9}}, {{ 55},{ 9}}, {{311},{ 9}}, {{183},{ 9}}, {{439},{ 9}}, +{{119},{ 9}}, {{375},{ 9}}, {{247},{ 9}}, {{503},{ 9}}, {{ 15},{ 9}}, +{{271},{ 9}}, {{143},{ 9}}, {{399},{ 9}}, {{ 79},{ 9}}, {{335},{ 9}}, +{{207},{ 9}}, {{463},{ 9}}, {{ 47},{ 9}}, {{303},{ 9}}, {{175},{ 9}}, +{{431},{ 9}}, {{111},{ 9}}, {{367},{ 9}}, {{239},{ 9}}, {{495},{ 9}}, +{{ 31},{ 9}}, {{287},{ 9}}, {{159},{ 9}}, {{415},{ 9}}, {{ 95},{ 9}}, +{{351},{ 9}}, {{223},{ 9}}, {{479},{ 9}}, {{ 63},{ 9}}, {{319},{ 9}}, +{{191},{ 9}}, {{447},{ 9}}, {{127},{ 9}}, {{383},{ 9}}, {{255},{ 9}}, +{{511},{ 9}}, {{ 0},{ 7}}, {{ 64},{ 7}}, {{ 32},{ 7}}, {{ 96},{ 7}}, +{{ 16},{ 7}}, {{ 80},{ 7}}, {{ 48},{ 7}}, {{112},{ 7}}, {{ 8},{ 7}}, +{{ 72},{ 7}}, {{ 40},{ 7}}, {{104},{ 7}}, {{ 24},{ 7}}, {{ 88},{ 7}}, +{{ 56},{ 7}}, {{120},{ 7}}, {{ 4},{ 7}}, {{ 68},{ 7}}, {{ 36},{ 7}}, +{{100},{ 7}}, {{ 20},{ 7}}, {{ 84},{ 7}}, {{ 52},{ 7}}, {{116},{ 7}}, +{{ 3},{ 8}}, {{131},{ 8}}, {{ 67},{ 8}}, {{195},{ 8}}, {{ 35},{ 8}}, +{{163},{ 8}}, {{ 99},{ 8}}, {{227},{ 8}} +}; + +local const ct_data static_dtree[D_CODES] = { +{{ 0},{ 5}}, {{16},{ 5}}, {{ 8},{ 5}}, {{24},{ 5}}, {{ 4},{ 5}}, +{{20},{ 5}}, {{12},{ 5}}, {{28},{ 5}}, {{ 2},{ 5}}, {{18},{ 5}}, +{{10},{ 5}}, {{26},{ 5}}, {{ 6},{ 5}}, {{22},{ 5}}, {{14},{ 5}}, +{{30},{ 5}}, {{ 1},{ 5}}, {{17},{ 5}}, {{ 9},{ 5}}, {{25},{ 5}}, +{{ 5},{ 5}}, {{21},{ 5}}, {{13},{ 5}}, {{29},{ 5}}, {{ 3},{ 5}}, +{{19},{ 5}}, {{11},{ 5}}, {{27},{ 5}}, {{ 7},{ 5}}, {{23},{ 5}} +}; + +const uch ZLIB_INTERNAL _dist_code[DIST_CODE_LEN] = { + 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, + 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, +10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, +11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, +12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, +13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, +13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, +14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, +14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, +14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, +15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, +15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, +15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 0, 0, 16, 17, +18, 18, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, +23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, +24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, +26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, +26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, +27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, +27, 27, 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, +28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, +28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, +28, 28, 28, 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, +29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, +29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, +29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29 +}; + +const uch ZLIB_INTERNAL _length_code[MAX_MATCH-MIN_MATCH+1]= { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 12, 12, +13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, +17, 17, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, +19, 19, 19, 19, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, +21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 22, 22, 22, 22, +22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, +23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, +24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, +25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, +25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26, 26, 26, +26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, +26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, +27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 28 +}; + +local const int base_length[LENGTH_CODES] = { +0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, +64, 80, 96, 112, 128, 160, 192, 224, 0 +}; + +local const int base_dist[D_CODES] = { + 0, 1, 2, 3, 4, 6, 8, 12, 16, 24, + 32, 48, 64, 96, 128, 192, 256, 384, 512, 768, + 1024, 1536, 2048, 3072, 4096, 6144, 8192, 12288, 16384, 24576 +}; + diff --git a/c-blosc/internal-complibs/zlib-1.3.1/uncompr.c b/c-blosc/internal-complibs/zlib-1.3.1/uncompr.c new file mode 100644 index 0000000..5e25666 --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.3.1/uncompr.c @@ -0,0 +1,85 @@ +/* uncompr.c -- decompress a memory buffer + * Copyright (C) 1995-2003, 2010, 2014, 2016 Jean-loup Gailly, Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* @(#) $Id$ */ + +#define ZLIB_INTERNAL +#include "zlib.h" + +/* =========================================================================== + Decompresses the source buffer into the destination buffer. *sourceLen is + the byte length of the source buffer. Upon entry, *destLen is the total size + of the destination buffer, which must be large enough to hold the entire + uncompressed data. (The size of the uncompressed data must have been saved + previously by the compressor and transmitted to the decompressor by some + mechanism outside the scope of this compression library.) Upon exit, + *destLen is the size of the decompressed data and *sourceLen is the number + of source bytes consumed. Upon return, source + *sourceLen points to the + first unused input byte. + + uncompress returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_BUF_ERROR if there was not enough room in the output buffer, or + Z_DATA_ERROR if the input data was corrupted, including if the input data is + an incomplete zlib stream. +*/ +int ZEXPORT uncompress2(Bytef *dest, uLongf *destLen, const Bytef *source, + uLong *sourceLen) { + z_stream stream; + int err; + const uInt max = (uInt)-1; + uLong len, left; + Byte buf[1]; /* for detection of incomplete stream when *destLen == 0 */ + + len = *sourceLen; + if (*destLen) { + left = *destLen; + *destLen = 0; + } + else { + left = 1; + dest = buf; + } + + stream.next_in = (z_const Bytef *)source; + stream.avail_in = 0; + stream.zalloc = (alloc_func)0; + stream.zfree = (free_func)0; + stream.opaque = (voidpf)0; + + err = inflateInit(&stream); + if (err != Z_OK) return err; + + stream.next_out = dest; + stream.avail_out = 0; + + do { + if (stream.avail_out == 0) { + stream.avail_out = left > (uLong)max ? max : (uInt)left; + left -= stream.avail_out; + } + if (stream.avail_in == 0) { + stream.avail_in = len > (uLong)max ? max : (uInt)len; + len -= stream.avail_in; + } + err = inflate(&stream, Z_NO_FLUSH); + } while (err == Z_OK); + + *sourceLen -= len + stream.avail_in; + if (dest != buf) + *destLen = stream.total_out; + else if (stream.total_out && err == Z_BUF_ERROR) + left = 1; + + inflateEnd(&stream); + return err == Z_STREAM_END ? Z_OK : + err == Z_NEED_DICT ? Z_DATA_ERROR : + err == Z_BUF_ERROR && left + stream.avail_out ? Z_DATA_ERROR : + err; +} + +int ZEXPORT uncompress(Bytef *dest, uLongf *destLen, const Bytef *source, + uLong sourceLen) { + return uncompress2(dest, destLen, source, &sourceLen); +} diff --git a/c-blosc/internal-complibs/zlib-1.3.1/zconf.h b/c-blosc/internal-complibs/zlib-1.3.1/zconf.h new file mode 100644 index 0000000..62adc8d --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.3.1/zconf.h @@ -0,0 +1,543 @@ +/* zconf.h -- configuration of the zlib compression library + * Copyright (C) 1995-2024 Jean-loup Gailly, Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* @(#) $Id$ */ + +#ifndef ZCONF_H +#define ZCONF_H + +/* + * If you *really* need a unique prefix for all types and library functions, + * compile with -DZ_PREFIX. The "standard" zlib should be compiled without it. + * Even better than compiling with -DZ_PREFIX would be to use configure to set + * this permanently in zconf.h using "./configure --zprefix". + */ +#ifdef Z_PREFIX /* may be set to #if 1 by ./configure */ +# define Z_PREFIX_SET + +/* all linked symbols and init macros */ +# define _dist_code z__dist_code +# define _length_code z__length_code +# define _tr_align z__tr_align +# define _tr_flush_bits z__tr_flush_bits +# define _tr_flush_block z__tr_flush_block +# define _tr_init z__tr_init +# define _tr_stored_block z__tr_stored_block +# define _tr_tally z__tr_tally +# define adler32 z_adler32 +# define adler32_combine z_adler32_combine +# define adler32_combine64 z_adler32_combine64 +# define adler32_z z_adler32_z +# ifndef Z_SOLO +# define compress z_compress +# define compress2 z_compress2 +# define compressBound z_compressBound +# endif +# define crc32 z_crc32 +# define crc32_combine z_crc32_combine +# define crc32_combine64 z_crc32_combine64 +# define crc32_combine_gen z_crc32_combine_gen +# define crc32_combine_gen64 z_crc32_combine_gen64 +# define crc32_combine_op z_crc32_combine_op +# define crc32_z z_crc32_z +# define deflate z_deflate +# define deflateBound z_deflateBound +# define deflateCopy z_deflateCopy +# define deflateEnd z_deflateEnd +# define deflateGetDictionary z_deflateGetDictionary +# define deflateInit z_deflateInit +# define deflateInit2 z_deflateInit2 +# define deflateInit2_ z_deflateInit2_ +# define deflateInit_ z_deflateInit_ +# define deflateParams z_deflateParams +# define deflatePending z_deflatePending +# define deflatePrime z_deflatePrime +# define deflateReset z_deflateReset +# define deflateResetKeep z_deflateResetKeep +# define deflateSetDictionary z_deflateSetDictionary +# define deflateSetHeader z_deflateSetHeader +# define deflateTune z_deflateTune +# define deflate_copyright z_deflate_copyright +# define get_crc_table z_get_crc_table +# ifndef Z_SOLO +# define gz_error z_gz_error +# define gz_intmax z_gz_intmax +# define gz_strwinerror z_gz_strwinerror +# define gzbuffer z_gzbuffer +# define gzclearerr z_gzclearerr +# define gzclose z_gzclose +# define gzclose_r z_gzclose_r +# define gzclose_w z_gzclose_w +# define gzdirect z_gzdirect +# define gzdopen z_gzdopen +# define gzeof z_gzeof +# define gzerror z_gzerror +# define gzflush z_gzflush +# define gzfread z_gzfread +# define gzfwrite z_gzfwrite +# define gzgetc z_gzgetc +# define gzgetc_ z_gzgetc_ +# define gzgets z_gzgets +# define gzoffset z_gzoffset +# define gzoffset64 z_gzoffset64 +# define gzopen z_gzopen +# define gzopen64 z_gzopen64 +# ifdef _WIN32 +# define gzopen_w z_gzopen_w +# endif +# define gzprintf z_gzprintf +# define gzputc z_gzputc +# define gzputs z_gzputs +# define gzread z_gzread +# define gzrewind z_gzrewind +# define gzseek z_gzseek +# define gzseek64 z_gzseek64 +# define gzsetparams z_gzsetparams +# define gztell z_gztell +# define gztell64 z_gztell64 +# define gzungetc z_gzungetc +# define gzvprintf z_gzvprintf +# define gzwrite z_gzwrite +# endif +# define inflate z_inflate +# define inflateBack z_inflateBack +# define inflateBackEnd z_inflateBackEnd +# define inflateBackInit z_inflateBackInit +# define inflateBackInit_ z_inflateBackInit_ +# define inflateCodesUsed z_inflateCodesUsed +# define inflateCopy z_inflateCopy +# define inflateEnd z_inflateEnd +# define inflateGetDictionary z_inflateGetDictionary +# define inflateGetHeader z_inflateGetHeader +# define inflateInit z_inflateInit +# define inflateInit2 z_inflateInit2 +# define inflateInit2_ z_inflateInit2_ +# define inflateInit_ z_inflateInit_ +# define inflateMark z_inflateMark +# define inflatePrime z_inflatePrime +# define inflateReset z_inflateReset +# define inflateReset2 z_inflateReset2 +# define inflateResetKeep z_inflateResetKeep +# define inflateSetDictionary z_inflateSetDictionary +# define inflateSync z_inflateSync +# define inflateSyncPoint z_inflateSyncPoint +# define inflateUndermine z_inflateUndermine +# define inflateValidate z_inflateValidate +# define inflate_copyright z_inflate_copyright +# define inflate_fast z_inflate_fast +# define inflate_table z_inflate_table +# ifndef Z_SOLO +# define uncompress z_uncompress +# define uncompress2 z_uncompress2 +# endif +# define zError z_zError +# ifndef Z_SOLO +# define zcalloc z_zcalloc +# define zcfree z_zcfree +# endif +# define zlibCompileFlags z_zlibCompileFlags +# define zlibVersion z_zlibVersion + +/* all zlib typedefs in zlib.h and zconf.h */ +# define Byte z_Byte +# define Bytef z_Bytef +# define alloc_func z_alloc_func +# define charf z_charf +# define free_func z_free_func +# ifndef Z_SOLO +# define gzFile z_gzFile +# endif +# define gz_header z_gz_header +# define gz_headerp z_gz_headerp +# define in_func z_in_func +# define intf z_intf +# define out_func z_out_func +# define uInt z_uInt +# define uIntf z_uIntf +# define uLong z_uLong +# define uLongf z_uLongf +# define voidp z_voidp +# define voidpc z_voidpc +# define voidpf z_voidpf + +/* all zlib structs in zlib.h and zconf.h */ +# define gz_header_s z_gz_header_s +# define internal_state z_internal_state + +#endif + +#if defined(__MSDOS__) && !defined(MSDOS) +# define MSDOS +#endif +#if (defined(OS_2) || defined(__OS2__)) && !defined(OS2) +# define OS2 +#endif +#if defined(_WINDOWS) && !defined(WINDOWS) +# define WINDOWS +#endif +#if defined(_WIN32) || defined(_WIN32_WCE) || defined(__WIN32__) +# ifndef WIN32 +# define WIN32 +# endif +#endif +#if (defined(MSDOS) || defined(OS2) || defined(WINDOWS)) && !defined(WIN32) +# if !defined(__GNUC__) && !defined(__FLAT__) && !defined(__386__) +# ifndef SYS16BIT +# define SYS16BIT +# endif +# endif +#endif + +/* + * Compile with -DMAXSEG_64K if the alloc function cannot allocate more + * than 64k bytes at a time (needed on systems with 16-bit int). + */ +#ifdef SYS16BIT +# define MAXSEG_64K +#endif +#ifdef MSDOS +# define UNALIGNED_OK +#endif + +#ifdef __STDC_VERSION__ +# ifndef STDC +# define STDC +# endif +# if __STDC_VERSION__ >= 199901L +# ifndef STDC99 +# define STDC99 +# endif +# endif +#endif +#if !defined(STDC) && (defined(__STDC__) || defined(__cplusplus)) +# define STDC +#endif +#if !defined(STDC) && (defined(__GNUC__) || defined(__BORLANDC__)) +# define STDC +#endif +#if !defined(STDC) && (defined(MSDOS) || defined(WINDOWS) || defined(WIN32)) +# define STDC +#endif +#if !defined(STDC) && (defined(OS2) || defined(__HOS_AIX__)) +# define STDC +#endif + +#if defined(__OS400__) && !defined(STDC) /* iSeries (formerly AS/400). */ +# define STDC +#endif + +#ifndef STDC +# ifndef const /* cannot use !defined(STDC) && !defined(const) on Mac */ +# define const /* note: need a more gentle solution here */ +# endif +#endif + +#if defined(ZLIB_CONST) && !defined(z_const) +# define z_const const +#else +# define z_const +#endif + +#ifdef Z_SOLO +# ifdef _WIN64 + typedef unsigned long long z_size_t; +# else + typedef unsigned long z_size_t; +# endif +#else +# define z_longlong long long +# if defined(NO_SIZE_T) + typedef unsigned NO_SIZE_T z_size_t; +# elif defined(STDC) +# include + typedef size_t z_size_t; +# else + typedef unsigned long z_size_t; +# endif +# undef z_longlong +#endif + +/* Maximum value for memLevel in deflateInit2 */ +#ifndef MAX_MEM_LEVEL +# ifdef MAXSEG_64K +# define MAX_MEM_LEVEL 8 +# else +# define MAX_MEM_LEVEL 9 +# endif +#endif + +/* Maximum value for windowBits in deflateInit2 and inflateInit2. + * WARNING: reducing MAX_WBITS makes minigzip unable to extract .gz files + * created by gzip. (Files created by minigzip can still be extracted by + * gzip.) + */ +#ifndef MAX_WBITS +# define MAX_WBITS 15 /* 32K LZ77 window */ +#endif + +/* The memory requirements for deflate are (in bytes): + (1 << (windowBits+2)) + (1 << (memLevel+9)) + that is: 128K for windowBits=15 + 128K for memLevel = 8 (default values) + plus a few kilobytes for small objects. For example, if you want to reduce + the default memory requirements from 256K to 128K, compile with + make CFLAGS="-O -DMAX_WBITS=14 -DMAX_MEM_LEVEL=7" + Of course this will generally degrade compression (there's no free lunch). + + The memory requirements for inflate are (in bytes) 1 << windowBits + that is, 32K for windowBits=15 (default value) plus about 7 kilobytes + for small objects. +*/ + + /* Type declarations */ + +#ifndef OF /* function prototypes */ +# ifdef STDC +# define OF(args) args +# else +# define OF(args) () +# endif +#endif + +/* The following definitions for FAR are needed only for MSDOS mixed + * model programming (small or medium model with some far allocations). + * This was tested only with MSC; for other MSDOS compilers you may have + * to define NO_MEMCPY in zutil.h. If you don't need the mixed model, + * just define FAR to be empty. + */ +#ifdef SYS16BIT +# if defined(M_I86SM) || defined(M_I86MM) + /* MSC small or medium model */ +# define SMALL_MEDIUM +# ifdef _MSC_VER +# define FAR _far +# else +# define FAR far +# endif +# endif +# if (defined(__SMALL__) || defined(__MEDIUM__)) + /* Turbo C small or medium model */ +# define SMALL_MEDIUM +# ifdef __BORLANDC__ +# define FAR _far +# else +# define FAR far +# endif +# endif +#endif + +#if defined(WINDOWS) || defined(WIN32) + /* If building or using zlib as a DLL, define ZLIB_DLL. + * This is not mandatory, but it offers a little performance increase. + */ +# ifdef ZLIB_DLL +# if defined(WIN32) && (!defined(__BORLANDC__) || (__BORLANDC__ >= 0x500)) +# ifdef ZLIB_INTERNAL +# define ZEXTERN extern __declspec(dllexport) +# else +# define ZEXTERN extern __declspec(dllimport) +# endif +# endif +# endif /* ZLIB_DLL */ + /* If building or using zlib with the WINAPI/WINAPIV calling convention, + * define ZLIB_WINAPI. + * Caution: the standard ZLIB1.DLL is NOT compiled using ZLIB_WINAPI. + */ +# ifdef ZLIB_WINAPI +# ifdef FAR +# undef FAR +# endif +# ifndef WIN32_LEAN_AND_MEAN +# define WIN32_LEAN_AND_MEAN +# endif +# include + /* No need for _export, use ZLIB.DEF instead. */ + /* For complete Windows compatibility, use WINAPI, not __stdcall. */ +# define ZEXPORT WINAPI +# ifdef WIN32 +# define ZEXPORTVA WINAPIV +# else +# define ZEXPORTVA FAR CDECL +# endif +# endif +#endif + +#if defined (__BEOS__) +# ifdef ZLIB_DLL +# ifdef ZLIB_INTERNAL +# define ZEXPORT __declspec(dllexport) +# define ZEXPORTVA __declspec(dllexport) +# else +# define ZEXPORT __declspec(dllimport) +# define ZEXPORTVA __declspec(dllimport) +# endif +# endif +#endif + +#ifndef ZEXTERN +# define ZEXTERN extern +#endif +#ifndef ZEXPORT +# define ZEXPORT +#endif +#ifndef ZEXPORTVA +# define ZEXPORTVA +#endif + +#ifndef FAR +# define FAR +#endif + +#if !defined(__MACTYPES__) +typedef unsigned char Byte; /* 8 bits */ +#endif +typedef unsigned int uInt; /* 16 bits or more */ +typedef unsigned long uLong; /* 32 bits or more */ + +#ifdef SMALL_MEDIUM + /* Borland C/C++ and some old MSC versions ignore FAR inside typedef */ +# define Bytef Byte FAR +#else + typedef Byte FAR Bytef; +#endif +typedef char FAR charf; +typedef int FAR intf; +typedef uInt FAR uIntf; +typedef uLong FAR uLongf; + +#ifdef STDC + typedef void const *voidpc; + typedef void FAR *voidpf; + typedef void *voidp; +#else + typedef Byte const *voidpc; + typedef Byte FAR *voidpf; + typedef Byte *voidp; +#endif + +#if !defined(Z_U4) && !defined(Z_SOLO) && defined(STDC) +# include +# if (UINT_MAX == 0xffffffffUL) +# define Z_U4 unsigned +# elif (ULONG_MAX == 0xffffffffUL) +# define Z_U4 unsigned long +# elif (USHRT_MAX == 0xffffffffUL) +# define Z_U4 unsigned short +# endif +#endif + +#ifdef Z_U4 + typedef Z_U4 z_crc_t; +#else + typedef unsigned long z_crc_t; +#endif + +#ifdef HAVE_UNISTD_H /* may be set to #if 1 by ./configure */ +# define Z_HAVE_UNISTD_H +#endif + +#ifdef HAVE_STDARG_H /* may be set to #if 1 by ./configure */ +# define Z_HAVE_STDARG_H +#endif + +#ifdef STDC +# ifndef Z_SOLO +# include /* for off_t */ +# endif +#endif + +#if defined(STDC) || defined(Z_HAVE_STDARG_H) +# ifndef Z_SOLO +# include /* for va_list */ +# endif +#endif + +#ifdef _WIN32 +# ifndef Z_SOLO +# include /* for wchar_t */ +# endif +#endif + +/* a little trick to accommodate both "#define _LARGEFILE64_SOURCE" and + * "#define _LARGEFILE64_SOURCE 1" as requesting 64-bit operations, (even + * though the former does not conform to the LFS document), but considering + * both "#undef _LARGEFILE64_SOURCE" and "#define _LARGEFILE64_SOURCE 0" as + * equivalently requesting no 64-bit operations + */ +#if defined(_LARGEFILE64_SOURCE) && -_LARGEFILE64_SOURCE - -1 == 1 +# undef _LARGEFILE64_SOURCE +#endif + +#ifndef Z_HAVE_UNISTD_H +# ifdef __WATCOMC__ +# define Z_HAVE_UNISTD_H +# endif +#endif +#ifndef Z_HAVE_UNISTD_H +# if defined(_LARGEFILE64_SOURCE) && !defined(_WIN32) +# define Z_HAVE_UNISTD_H +# endif +#endif +#ifndef Z_SOLO +# if defined(Z_HAVE_UNISTD_H) +# include /* for SEEK_*, off_t, and _LFS64_LARGEFILE */ +# ifdef VMS +# include /* for off_t */ +# endif +# ifndef z_off_t +# define z_off_t off_t +# endif +# endif +#endif + +#if defined(_LFS64_LARGEFILE) && _LFS64_LARGEFILE-0 +# define Z_LFS64 +#endif + +#if defined(_LARGEFILE64_SOURCE) && defined(Z_LFS64) +# define Z_LARGE64 +#endif + +#if defined(_FILE_OFFSET_BITS) && _FILE_OFFSET_BITS-0 == 64 && defined(Z_LFS64) +# define Z_WANT64 +#endif + +#if !defined(SEEK_SET) && !defined(Z_SOLO) +# define SEEK_SET 0 /* Seek from beginning of file. */ +# define SEEK_CUR 1 /* Seek from current position. */ +# define SEEK_END 2 /* Set file pointer to EOF plus "offset" */ +#endif + +#ifndef z_off_t +# define z_off_t long +#endif + +#if !defined(_WIN32) && defined(Z_LARGE64) +# define z_off64_t off64_t +#else +# if defined(_WIN32) && !defined(__GNUC__) +# define z_off64_t __int64 +# else +# define z_off64_t z_off_t +# endif +#endif + +/* MVS linker does not support external names larger than 8 bytes */ +#if defined(__MVS__) + #pragma map(deflateInit_,"DEIN") + #pragma map(deflateInit2_,"DEIN2") + #pragma map(deflateEnd,"DEEND") + #pragma map(deflateBound,"DEBND") + #pragma map(inflateInit_,"ININ") + #pragma map(inflateInit2_,"ININ2") + #pragma map(inflateEnd,"INEND") + #pragma map(inflateSync,"INSY") + #pragma map(inflateSetDictionary,"INSEDI") + #pragma map(compressBound,"CMBND") + #pragma map(inflate_table,"INTABL") + #pragma map(inflate_fast,"INFA") + #pragma map(inflate_copyright,"INCOPY") +#endif + +#endif /* ZCONF_H */ diff --git a/c-blosc/internal-complibs/zlib-1.3.1/zlib.h b/c-blosc/internal-complibs/zlib-1.3.1/zlib.h new file mode 100644 index 0000000..8d4b932 --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.3.1/zlib.h @@ -0,0 +1,1938 @@ +/* zlib.h -- interface of the 'zlib' general purpose compression library + version 1.3.1, January 22nd, 2024 + + Copyright (C) 1995-2024 Jean-loup Gailly and Mark Adler + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + Jean-loup Gailly Mark Adler + jloup@gzip.org madler@alumni.caltech.edu + + + The data format used by the zlib library is described by RFCs (Request for + Comments) 1950 to 1952 in the files http://tools.ietf.org/html/rfc1950 + (zlib format), rfc1951 (deflate format) and rfc1952 (gzip format). +*/ + +#ifndef ZLIB_H +#define ZLIB_H + +#include "zconf.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define ZLIB_VERSION "1.3.1" +#define ZLIB_VERNUM 0x1310 +#define ZLIB_VER_MAJOR 1 +#define ZLIB_VER_MINOR 3 +#define ZLIB_VER_REVISION 1 +#define ZLIB_VER_SUBREVISION 0 + +/* + The 'zlib' compression library provides in-memory compression and + decompression functions, including integrity checks of the uncompressed data. + This version of the library supports only one compression method (deflation) + but other algorithms will be added later and will have the same stream + interface. + + Compression can be done in a single step if the buffers are large enough, + or can be done by repeated calls of the compression function. In the latter + case, the application must provide more input and/or consume the output + (providing more output space) before each call. + + The compressed data format used by default by the in-memory functions is + the zlib format, which is a zlib wrapper documented in RFC 1950, wrapped + around a deflate stream, which is itself documented in RFC 1951. + + The library also supports reading and writing files in gzip (.gz) format + with an interface similar to that of stdio using the functions that start + with "gz". The gzip format is different from the zlib format. gzip is a + gzip wrapper, documented in RFC 1952, wrapped around a deflate stream. + + This library can optionally read and write gzip and raw deflate streams in + memory as well. + + The zlib format was designed to be compact and fast for use in memory + and on communications channels. The gzip format was designed for single- + file compression on file systems, has a larger header than zlib to maintain + directory information, and uses a different, slower check method than zlib. + + The library does not install any signal handler. The decoder checks + the consistency of the compressed data, so the library should never crash + even in the case of corrupted input. +*/ + +typedef voidpf (*alloc_func)(voidpf opaque, uInt items, uInt size); +typedef void (*free_func)(voidpf opaque, voidpf address); + +struct internal_state; + +typedef struct z_stream_s { + z_const Bytef *next_in; /* next input byte */ + uInt avail_in; /* number of bytes available at next_in */ + uLong total_in; /* total number of input bytes read so far */ + + Bytef *next_out; /* next output byte will go here */ + uInt avail_out; /* remaining free space at next_out */ + uLong total_out; /* total number of bytes output so far */ + + z_const char *msg; /* last error message, NULL if no error */ + struct internal_state FAR *state; /* not visible by applications */ + + alloc_func zalloc; /* used to allocate the internal state */ + free_func zfree; /* used to free the internal state */ + voidpf opaque; /* private data object passed to zalloc and zfree */ + + int data_type; /* best guess about the data type: binary or text + for deflate, or the decoding state for inflate */ + uLong adler; /* Adler-32 or CRC-32 value of the uncompressed data */ + uLong reserved; /* reserved for future use */ +} z_stream; + +typedef z_stream FAR *z_streamp; + +/* + gzip header information passed to and from zlib routines. See RFC 1952 + for more details on the meanings of these fields. +*/ +typedef struct gz_header_s { + int text; /* true if compressed data believed to be text */ + uLong time; /* modification time */ + int xflags; /* extra flags (not used when writing a gzip file) */ + int os; /* operating system */ + Bytef *extra; /* pointer to extra field or Z_NULL if none */ + uInt extra_len; /* extra field length (valid if extra != Z_NULL) */ + uInt extra_max; /* space at extra (only when reading header) */ + Bytef *name; /* pointer to zero-terminated file name or Z_NULL */ + uInt name_max; /* space at name (only when reading header) */ + Bytef *comment; /* pointer to zero-terminated comment or Z_NULL */ + uInt comm_max; /* space at comment (only when reading header) */ + int hcrc; /* true if there was or will be a header crc */ + int done; /* true when done reading gzip header (not used + when writing a gzip file) */ +} gz_header; + +typedef gz_header FAR *gz_headerp; + +/* + The application must update next_in and avail_in when avail_in has dropped + to zero. It must update next_out and avail_out when avail_out has dropped + to zero. The application must initialize zalloc, zfree and opaque before + calling the init function. All other fields are set by the compression + library and must not be updated by the application. + + The opaque value provided by the application will be passed as the first + parameter for calls of zalloc and zfree. This can be useful for custom + memory management. The compression library attaches no meaning to the + opaque value. + + zalloc must return Z_NULL if there is not enough memory for the object. + If zlib is used in a multi-threaded application, zalloc and zfree must be + thread safe. In that case, zlib is thread-safe. When zalloc and zfree are + Z_NULL on entry to the initialization function, they are set to internal + routines that use the standard library functions malloc() and free(). + + On 16-bit systems, the functions zalloc and zfree must be able to allocate + exactly 65536 bytes, but will not be required to allocate more than this if + the symbol MAXSEG_64K is defined (see zconf.h). WARNING: On MSDOS, pointers + returned by zalloc for objects of exactly 65536 bytes *must* have their + offset normalized to zero. The default allocation function provided by this + library ensures this (see zutil.c). To reduce memory requirements and avoid + any allocation of 64K objects, at the expense of compression ratio, compile + the library with -DMAX_WBITS=14 (see zconf.h). + + The fields total_in and total_out can be used for statistics or progress + reports. After compression, total_in holds the total size of the + uncompressed data and may be saved for use by the decompressor (particularly + if the decompressor wants to decompress everything in a single step). +*/ + + /* constants */ + +#define Z_NO_FLUSH 0 +#define Z_PARTIAL_FLUSH 1 +#define Z_SYNC_FLUSH 2 +#define Z_FULL_FLUSH 3 +#define Z_FINISH 4 +#define Z_BLOCK 5 +#define Z_TREES 6 +/* Allowed flush values; see deflate() and inflate() below for details */ + +#define Z_OK 0 +#define Z_STREAM_END 1 +#define Z_NEED_DICT 2 +#define Z_ERRNO (-1) +#define Z_STREAM_ERROR (-2) +#define Z_DATA_ERROR (-3) +#define Z_MEM_ERROR (-4) +#define Z_BUF_ERROR (-5) +#define Z_VERSION_ERROR (-6) +/* Return codes for the compression/decompression functions. Negative values + * are errors, positive values are used for special but normal events. + */ + +#define Z_NO_COMPRESSION 0 +#define Z_BEST_SPEED 1 +#define Z_BEST_COMPRESSION 9 +#define Z_DEFAULT_COMPRESSION (-1) +/* compression levels */ + +#define Z_FILTERED 1 +#define Z_HUFFMAN_ONLY 2 +#define Z_RLE 3 +#define Z_FIXED 4 +#define Z_DEFAULT_STRATEGY 0 +/* compression strategy; see deflateInit2() below for details */ + +#define Z_BINARY 0 +#define Z_TEXT 1 +#define Z_ASCII Z_TEXT /* for compatibility with 1.2.2 and earlier */ +#define Z_UNKNOWN 2 +/* Possible values of the data_type field for deflate() */ + +#define Z_DEFLATED 8 +/* The deflate compression method (the only one supported in this version) */ + +#define Z_NULL 0 /* for initializing zalloc, zfree, opaque */ + +#define zlib_version zlibVersion() +/* for compatibility with versions < 1.0.2 */ + + + /* basic functions */ + +ZEXTERN const char * ZEXPORT zlibVersion(void); +/* The application can compare zlibVersion and ZLIB_VERSION for consistency. + If the first character differs, the library code actually used is not + compatible with the zlib.h header file used by the application. This check + is automatically made by deflateInit and inflateInit. + */ + +/* +ZEXTERN int ZEXPORT deflateInit(z_streamp strm, int level); + + Initializes the internal stream state for compression. The fields + zalloc, zfree and opaque must be initialized before by the caller. If + zalloc and zfree are set to Z_NULL, deflateInit updates them to use default + allocation functions. total_in, total_out, adler, and msg are initialized. + + The compression level must be Z_DEFAULT_COMPRESSION, or between 0 and 9: + 1 gives best speed, 9 gives best compression, 0 gives no compression at all + (the input data is simply copied a block at a time). Z_DEFAULT_COMPRESSION + requests a default compromise between speed and compression (currently + equivalent to level 6). + + deflateInit returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_STREAM_ERROR if level is not a valid compression level, or + Z_VERSION_ERROR if the zlib library version (zlib_version) is incompatible + with the version assumed by the caller (ZLIB_VERSION). msg is set to null + if there is no error message. deflateInit does not perform any compression: + this will be done by deflate(). +*/ + + +ZEXTERN int ZEXPORT deflate(z_streamp strm, int flush); +/* + deflate compresses as much data as possible, and stops when the input + buffer becomes empty or the output buffer becomes full. It may introduce + some output latency (reading input without producing any output) except when + forced to flush. + + The detailed semantics are as follows. deflate performs one or both of the + following actions: + + - Compress more input starting at next_in and update next_in and avail_in + accordingly. If not all input can be processed (because there is not + enough room in the output buffer), next_in and avail_in are updated and + processing will resume at this point for the next call of deflate(). + + - Generate more output starting at next_out and update next_out and avail_out + accordingly. This action is forced if the parameter flush is non zero. + Forcing flush frequently degrades the compression ratio, so this parameter + should be set only when necessary. Some output may be provided even if + flush is zero. + + Before the call of deflate(), the application should ensure that at least + one of the actions is possible, by providing more input and/or consuming more + output, and updating avail_in or avail_out accordingly; avail_out should + never be zero before the call. The application can consume the compressed + output when it wants, for example when the output buffer is full (avail_out + == 0), or after each call of deflate(). If deflate returns Z_OK and with + zero avail_out, it must be called again after making room in the output + buffer because there might be more output pending. See deflatePending(), + which can be used if desired to determine whether or not there is more output + in that case. + + Normally the parameter flush is set to Z_NO_FLUSH, which allows deflate to + decide how much data to accumulate before producing output, in order to + maximize compression. + + If the parameter flush is set to Z_SYNC_FLUSH, all pending output is + flushed to the output buffer and the output is aligned on a byte boundary, so + that the decompressor can get all input data available so far. (In + particular avail_in is zero after the call if enough output space has been + provided before the call.) Flushing may degrade compression for some + compression algorithms and so it should be used only when necessary. This + completes the current deflate block and follows it with an empty stored block + that is three bits plus filler bits to the next byte, followed by four bytes + (00 00 ff ff). + + If flush is set to Z_PARTIAL_FLUSH, all pending output is flushed to the + output buffer, but the output is not aligned to a byte boundary. All of the + input data so far will be available to the decompressor, as for Z_SYNC_FLUSH. + This completes the current deflate block and follows it with an empty fixed + codes block that is 10 bits long. This assures that enough bytes are output + in order for the decompressor to finish the block before the empty fixed + codes block. + + If flush is set to Z_BLOCK, a deflate block is completed and emitted, as + for Z_SYNC_FLUSH, but the output is not aligned on a byte boundary, and up to + seven bits of the current block are held to be written as the next byte after + the next deflate block is completed. In this case, the decompressor may not + be provided enough bits at this point in order to complete decompression of + the data provided so far to the compressor. It may need to wait for the next + block to be emitted. This is for advanced applications that need to control + the emission of deflate blocks. + + If flush is set to Z_FULL_FLUSH, all output is flushed as with + Z_SYNC_FLUSH, and the compression state is reset so that decompression can + restart from this point if previous compressed data has been damaged or if + random access is desired. Using Z_FULL_FLUSH too often can seriously degrade + compression. + + If deflate returns with avail_out == 0, this function must be called again + with the same value of the flush parameter and more output space (updated + avail_out), until the flush is complete (deflate returns with non-zero + avail_out). In the case of a Z_FULL_FLUSH or Z_SYNC_FLUSH, make sure that + avail_out is greater than six when the flush marker begins, in order to avoid + repeated flush markers upon calling deflate() again when avail_out == 0. + + If the parameter flush is set to Z_FINISH, pending input is processed, + pending output is flushed and deflate returns with Z_STREAM_END if there was + enough output space. If deflate returns with Z_OK or Z_BUF_ERROR, this + function must be called again with Z_FINISH and more output space (updated + avail_out) but no more input data, until it returns with Z_STREAM_END or an + error. After deflate has returned Z_STREAM_END, the only possible operations + on the stream are deflateReset or deflateEnd. + + Z_FINISH can be used in the first deflate call after deflateInit if all the + compression is to be done in a single step. In order to complete in one + call, avail_out must be at least the value returned by deflateBound (see + below). Then deflate is guaranteed to return Z_STREAM_END. If not enough + output space is provided, deflate will not return Z_STREAM_END, and it must + be called again as described above. + + deflate() sets strm->adler to the Adler-32 checksum of all input read + so far (that is, total_in bytes). If a gzip stream is being generated, then + strm->adler will be the CRC-32 checksum of the input read so far. (See + deflateInit2 below.) + + deflate() may update strm->data_type if it can make a good guess about + the input data type (Z_BINARY or Z_TEXT). If in doubt, the data is + considered binary. This field is only for information purposes and does not + affect the compression algorithm in any manner. + + deflate() returns Z_OK if some progress has been made (more input + processed or more output produced), Z_STREAM_END if all input has been + consumed and all output has been produced (only when flush is set to + Z_FINISH), Z_STREAM_ERROR if the stream state was inconsistent (for example + if next_in or next_out was Z_NULL or the state was inadvertently written over + by the application), or Z_BUF_ERROR if no progress is possible (for example + avail_in or avail_out was zero). Note that Z_BUF_ERROR is not fatal, and + deflate() can be called again with more input and more output space to + continue compressing. +*/ + + +ZEXTERN int ZEXPORT deflateEnd(z_streamp strm); +/* + All dynamically allocated data structures for this stream are freed. + This function discards any unprocessed input and does not flush any pending + output. + + deflateEnd returns Z_OK if success, Z_STREAM_ERROR if the + stream state was inconsistent, Z_DATA_ERROR if the stream was freed + prematurely (some input or output was discarded). In the error case, msg + may be set but then points to a static string (which must not be + deallocated). +*/ + + +/* +ZEXTERN int ZEXPORT inflateInit(z_streamp strm); + + Initializes the internal stream state for decompression. The fields + next_in, avail_in, zalloc, zfree and opaque must be initialized before by + the caller. In the current version of inflate, the provided input is not + read or consumed. The allocation of a sliding window will be deferred to + the first call of inflate (if the decompression does not complete on the + first call). If zalloc and zfree are set to Z_NULL, inflateInit updates + them to use default allocation functions. total_in, total_out, adler, and + msg are initialized. + + inflateInit returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_VERSION_ERROR if the zlib library version is incompatible with the + version assumed by the caller, or Z_STREAM_ERROR if the parameters are + invalid, such as a null pointer to the structure. msg is set to null if + there is no error message. inflateInit does not perform any decompression. + Actual decompression will be done by inflate(). So next_in, and avail_in, + next_out, and avail_out are unused and unchanged. The current + implementation of inflateInit() does not process any header information -- + that is deferred until inflate() is called. +*/ + + +ZEXTERN int ZEXPORT inflate(z_streamp strm, int flush); +/* + inflate decompresses as much data as possible, and stops when the input + buffer becomes empty or the output buffer becomes full. It may introduce + some output latency (reading input without producing any output) except when + forced to flush. + + The detailed semantics are as follows. inflate performs one or both of the + following actions: + + - Decompress more input starting at next_in and update next_in and avail_in + accordingly. If not all input can be processed (because there is not + enough room in the output buffer), then next_in and avail_in are updated + accordingly, and processing will resume at this point for the next call of + inflate(). + + - Generate more output starting at next_out and update next_out and avail_out + accordingly. inflate() provides as much output as possible, until there is + no more input data or no more space in the output buffer (see below about + the flush parameter). + + Before the call of inflate(), the application should ensure that at least + one of the actions is possible, by providing more input and/or consuming more + output, and updating the next_* and avail_* values accordingly. If the + caller of inflate() does not provide both available input and available + output space, it is possible that there will be no progress made. The + application can consume the uncompressed output when it wants, for example + when the output buffer is full (avail_out == 0), or after each call of + inflate(). If inflate returns Z_OK and with zero avail_out, it must be + called again after making room in the output buffer because there might be + more output pending. + + The flush parameter of inflate() can be Z_NO_FLUSH, Z_SYNC_FLUSH, Z_FINISH, + Z_BLOCK, or Z_TREES. Z_SYNC_FLUSH requests that inflate() flush as much + output as possible to the output buffer. Z_BLOCK requests that inflate() + stop if and when it gets to the next deflate block boundary. When decoding + the zlib or gzip format, this will cause inflate() to return immediately + after the header and before the first block. When doing a raw inflate, + inflate() will go ahead and process the first block, and will return when it + gets to the end of that block, or when it runs out of data. + + The Z_BLOCK option assists in appending to or combining deflate streams. + To assist in this, on return inflate() always sets strm->data_type to the + number of unused bits in the last byte taken from strm->next_in, plus 64 if + inflate() is currently decoding the last block in the deflate stream, plus + 128 if inflate() returned immediately after decoding an end-of-block code or + decoding the complete header up to just before the first byte of the deflate + stream. The end-of-block will not be indicated until all of the uncompressed + data from that block has been written to strm->next_out. The number of + unused bits may in general be greater than seven, except when bit 7 of + data_type is set, in which case the number of unused bits will be less than + eight. data_type is set as noted here every time inflate() returns for all + flush options, and so can be used to determine the amount of currently + consumed input in bits. + + The Z_TREES option behaves as Z_BLOCK does, but it also returns when the + end of each deflate block header is reached, before any actual data in that + block is decoded. This allows the caller to determine the length of the + deflate block header for later use in random access within a deflate block. + 256 is added to the value of strm->data_type when inflate() returns + immediately after reaching the end of the deflate block header. + + inflate() should normally be called until it returns Z_STREAM_END or an + error. However if all decompression is to be performed in a single step (a + single call of inflate), the parameter flush should be set to Z_FINISH. In + this case all pending input is processed and all pending output is flushed; + avail_out must be large enough to hold all of the uncompressed data for the + operation to complete. (The size of the uncompressed data may have been + saved by the compressor for this purpose.) The use of Z_FINISH is not + required to perform an inflation in one step. However it may be used to + inform inflate that a faster approach can be used for the single inflate() + call. Z_FINISH also informs inflate to not maintain a sliding window if the + stream completes, which reduces inflate's memory footprint. If the stream + does not complete, either because not all of the stream is provided or not + enough output space is provided, then a sliding window will be allocated and + inflate() can be called again to continue the operation as if Z_NO_FLUSH had + been used. + + In this implementation, inflate() always flushes as much output as + possible to the output buffer, and always uses the faster approach on the + first call. So the effects of the flush parameter in this implementation are + on the return value of inflate() as noted below, when inflate() returns early + when Z_BLOCK or Z_TREES is used, and when inflate() avoids the allocation of + memory for a sliding window when Z_FINISH is used. + + If a preset dictionary is needed after this call (see inflateSetDictionary + below), inflate sets strm->adler to the Adler-32 checksum of the dictionary + chosen by the compressor and returns Z_NEED_DICT; otherwise it sets + strm->adler to the Adler-32 checksum of all output produced so far (that is, + total_out bytes) and returns Z_OK, Z_STREAM_END or an error code as described + below. At the end of the stream, inflate() checks that its computed Adler-32 + checksum is equal to that saved by the compressor and returns Z_STREAM_END + only if the checksum is correct. + + inflate() can decompress and check either zlib-wrapped or gzip-wrapped + deflate data. The header type is detected automatically, if requested when + initializing with inflateInit2(). Any information contained in the gzip + header is not retained unless inflateGetHeader() is used. When processing + gzip-wrapped deflate data, strm->adler32 is set to the CRC-32 of the output + produced so far. The CRC-32 is checked against the gzip trailer, as is the + uncompressed length, modulo 2^32. + + inflate() returns Z_OK if some progress has been made (more input processed + or more output produced), Z_STREAM_END if the end of the compressed data has + been reached and all uncompressed output has been produced, Z_NEED_DICT if a + preset dictionary is needed at this point, Z_DATA_ERROR if the input data was + corrupted (input stream not conforming to the zlib format or incorrect check + value, in which case strm->msg points to a string with a more specific + error), Z_STREAM_ERROR if the stream structure was inconsistent (for example + next_in or next_out was Z_NULL, or the state was inadvertently written over + by the application), Z_MEM_ERROR if there was not enough memory, Z_BUF_ERROR + if no progress was possible or if there was not enough room in the output + buffer when Z_FINISH is used. Note that Z_BUF_ERROR is not fatal, and + inflate() can be called again with more input and more output space to + continue decompressing. If Z_DATA_ERROR is returned, the application may + then call inflateSync() to look for a good compression block if a partial + recovery of the data is to be attempted. +*/ + + +ZEXTERN int ZEXPORT inflateEnd(z_streamp strm); +/* + All dynamically allocated data structures for this stream are freed. + This function discards any unprocessed input and does not flush any pending + output. + + inflateEnd returns Z_OK if success, or Z_STREAM_ERROR if the stream state + was inconsistent. +*/ + + + /* Advanced functions */ + +/* + The following functions are needed only in some special applications. +*/ + +/* +ZEXTERN int ZEXPORT deflateInit2(z_streamp strm, + int level, + int method, + int windowBits, + int memLevel, + int strategy); + + This is another version of deflateInit with more compression options. The + fields zalloc, zfree and opaque must be initialized before by the caller. + + The method parameter is the compression method. It must be Z_DEFLATED in + this version of the library. + + The windowBits parameter is the base two logarithm of the window size + (the size of the history buffer). It should be in the range 8..15 for this + version of the library. Larger values of this parameter result in better + compression at the expense of memory usage. The default value is 15 if + deflateInit is used instead. + + For the current implementation of deflate(), a windowBits value of 8 (a + window size of 256 bytes) is not supported. As a result, a request for 8 + will result in 9 (a 512-byte window). In that case, providing 8 to + inflateInit2() will result in an error when the zlib header with 9 is + checked against the initialization of inflate(). The remedy is to not use 8 + with deflateInit2() with this initialization, or at least in that case use 9 + with inflateInit2(). + + windowBits can also be -8..-15 for raw deflate. In this case, -windowBits + determines the window size. deflate() will then generate raw deflate data + with no zlib header or trailer, and will not compute a check value. + + windowBits can also be greater than 15 for optional gzip encoding. Add + 16 to windowBits to write a simple gzip header and trailer around the + compressed data instead of a zlib wrapper. The gzip header will have no + file name, no extra data, no comment, no modification time (set to zero), no + header crc, and the operating system will be set to the appropriate value, + if the operating system was determined at compile time. If a gzip stream is + being written, strm->adler is a CRC-32 instead of an Adler-32. + + For raw deflate or gzip encoding, a request for a 256-byte window is + rejected as invalid, since only the zlib header provides a means of + transmitting the window size to the decompressor. + + The memLevel parameter specifies how much memory should be allocated + for the internal compression state. memLevel=1 uses minimum memory but is + slow and reduces compression ratio; memLevel=9 uses maximum memory for + optimal speed. The default value is 8. See zconf.h for total memory usage + as a function of windowBits and memLevel. + + The strategy parameter is used to tune the compression algorithm. Use the + value Z_DEFAULT_STRATEGY for normal data, Z_FILTERED for data produced by a + filter (or predictor), Z_HUFFMAN_ONLY to force Huffman encoding only (no + string match), or Z_RLE to limit match distances to one (run-length + encoding). Filtered data consists mostly of small values with a somewhat + random distribution. In this case, the compression algorithm is tuned to + compress them better. The effect of Z_FILTERED is to force more Huffman + coding and less string matching; it is somewhat intermediate between + Z_DEFAULT_STRATEGY and Z_HUFFMAN_ONLY. Z_RLE is designed to be almost as + fast as Z_HUFFMAN_ONLY, but give better compression for PNG image data. The + strategy parameter only affects the compression ratio but not the + correctness of the compressed output even if it is not set appropriately. + Z_FIXED prevents the use of dynamic Huffman codes, allowing for a simpler + decoder for special applications. + + deflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_STREAM_ERROR if any parameter is invalid (such as an invalid + method), or Z_VERSION_ERROR if the zlib library version (zlib_version) is + incompatible with the version assumed by the caller (ZLIB_VERSION). msg is + set to null if there is no error message. deflateInit2 does not perform any + compression: this will be done by deflate(). +*/ + +ZEXTERN int ZEXPORT deflateSetDictionary(z_streamp strm, + const Bytef *dictionary, + uInt dictLength); +/* + Initializes the compression dictionary from the given byte sequence + without producing any compressed output. When using the zlib format, this + function must be called immediately after deflateInit, deflateInit2 or + deflateReset, and before any call of deflate. When doing raw deflate, this + function must be called either before any call of deflate, or immediately + after the completion of a deflate block, i.e. after all input has been + consumed and all output has been delivered when using any of the flush + options Z_BLOCK, Z_PARTIAL_FLUSH, Z_SYNC_FLUSH, or Z_FULL_FLUSH. The + compressor and decompressor must use exactly the same dictionary (see + inflateSetDictionary). + + The dictionary should consist of strings (byte sequences) that are likely + to be encountered later in the data to be compressed, with the most commonly + used strings preferably put towards the end of the dictionary. Using a + dictionary is most useful when the data to be compressed is short and can be + predicted with good accuracy; the data can then be compressed better than + with the default empty dictionary. + + Depending on the size of the compression data structures selected by + deflateInit or deflateInit2, a part of the dictionary may in effect be + discarded, for example if the dictionary is larger than the window size + provided in deflateInit or deflateInit2. Thus the strings most likely to be + useful should be put at the end of the dictionary, not at the front. In + addition, the current implementation of deflate will use at most the window + size minus 262 bytes of the provided dictionary. + + Upon return of this function, strm->adler is set to the Adler-32 value + of the dictionary; the decompressor may later use this value to determine + which dictionary has been used by the compressor. (The Adler-32 value + applies to the whole dictionary even if only a subset of the dictionary is + actually used by the compressor.) If a raw deflate was requested, then the + Adler-32 value is not computed and strm->adler is not set. + + deflateSetDictionary returns Z_OK if success, or Z_STREAM_ERROR if a + parameter is invalid (e.g. dictionary being Z_NULL) or the stream state is + inconsistent (for example if deflate has already been called for this stream + or if not at a block boundary for raw deflate). deflateSetDictionary does + not perform any compression: this will be done by deflate(). +*/ + +ZEXTERN int ZEXPORT deflateGetDictionary(z_streamp strm, + Bytef *dictionary, + uInt *dictLength); +/* + Returns the sliding dictionary being maintained by deflate. dictLength is + set to the number of bytes in the dictionary, and that many bytes are copied + to dictionary. dictionary must have enough space, where 32768 bytes is + always enough. If deflateGetDictionary() is called with dictionary equal to + Z_NULL, then only the dictionary length is returned, and nothing is copied. + Similarly, if dictLength is Z_NULL, then it is not set. + + deflateGetDictionary() may return a length less than the window size, even + when more than the window size in input has been provided. It may return up + to 258 bytes less in that case, due to how zlib's implementation of deflate + manages the sliding window and lookahead for matches, where matches can be + up to 258 bytes long. If the application needs the last window-size bytes of + input, then that would need to be saved by the application outside of zlib. + + deflateGetDictionary returns Z_OK on success, or Z_STREAM_ERROR if the + stream state is inconsistent. +*/ + +ZEXTERN int ZEXPORT deflateCopy(z_streamp dest, + z_streamp source); +/* + Sets the destination stream as a complete copy of the source stream. + + This function can be useful when several compression strategies will be + tried, for example when there are several ways of pre-processing the input + data with a filter. The streams that will be discarded should then be freed + by calling deflateEnd. Note that deflateCopy duplicates the internal + compression state which can be quite large, so this strategy is slow and can + consume lots of memory. + + deflateCopy returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_STREAM_ERROR if the source stream state was inconsistent + (such as zalloc being Z_NULL). msg is left unchanged in both source and + destination. +*/ + +ZEXTERN int ZEXPORT deflateReset(z_streamp strm); +/* + This function is equivalent to deflateEnd followed by deflateInit, but + does not free and reallocate the internal compression state. The stream + will leave the compression level and any other attributes that may have been + set unchanged. total_in, total_out, adler, and msg are initialized. + + deflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent (such as zalloc or state being Z_NULL). +*/ + +ZEXTERN int ZEXPORT deflateParams(z_streamp strm, + int level, + int strategy); +/* + Dynamically update the compression level and compression strategy. The + interpretation of level and strategy is as in deflateInit2(). This can be + used to switch between compression and straight copy of the input data, or + to switch to a different kind of input data requiring a different strategy. + If the compression approach (which is a function of the level) or the + strategy is changed, and if there have been any deflate() calls since the + state was initialized or reset, then the input available so far is + compressed with the old level and strategy using deflate(strm, Z_BLOCK). + There are three approaches for the compression levels 0, 1..3, and 4..9 + respectively. The new level and strategy will take effect at the next call + of deflate(). + + If a deflate(strm, Z_BLOCK) is performed by deflateParams(), and it does + not have enough output space to complete, then the parameter change will not + take effect. In this case, deflateParams() can be called again with the + same parameters and more output space to try again. + + In order to assure a change in the parameters on the first try, the + deflate stream should be flushed using deflate() with Z_BLOCK or other flush + request until strm.avail_out is not zero, before calling deflateParams(). + Then no more input data should be provided before the deflateParams() call. + If this is done, the old level and strategy will be applied to the data + compressed before deflateParams(), and the new level and strategy will be + applied to the data compressed after deflateParams(). + + deflateParams returns Z_OK on success, Z_STREAM_ERROR if the source stream + state was inconsistent or if a parameter was invalid, or Z_BUF_ERROR if + there was not enough output space to complete the compression of the + available input data before a change in the strategy or approach. Note that + in the case of a Z_BUF_ERROR, the parameters are not changed. A return + value of Z_BUF_ERROR is not fatal, in which case deflateParams() can be + retried with more output space. +*/ + +ZEXTERN int ZEXPORT deflateTune(z_streamp strm, + int good_length, + int max_lazy, + int nice_length, + int max_chain); +/* + Fine tune deflate's internal compression parameters. This should only be + used by someone who understands the algorithm used by zlib's deflate for + searching for the best matching string, and even then only by the most + fanatic optimizer trying to squeeze out the last compressed bit for their + specific input data. Read the deflate.c source code for the meaning of the + max_lazy, good_length, nice_length, and max_chain parameters. + + deflateTune() can be called after deflateInit() or deflateInit2(), and + returns Z_OK on success, or Z_STREAM_ERROR for an invalid deflate stream. + */ + +ZEXTERN uLong ZEXPORT deflateBound(z_streamp strm, + uLong sourceLen); +/* + deflateBound() returns an upper bound on the compressed size after + deflation of sourceLen bytes. It must be called after deflateInit() or + deflateInit2(), and after deflateSetHeader(), if used. This would be used + to allocate an output buffer for deflation in a single pass, and so would be + called before deflate(). If that first deflate() call is provided the + sourceLen input bytes, an output buffer allocated to the size returned by + deflateBound(), and the flush value Z_FINISH, then deflate() is guaranteed + to return Z_STREAM_END. Note that it is possible for the compressed size to + be larger than the value returned by deflateBound() if flush options other + than Z_FINISH or Z_NO_FLUSH are used. +*/ + +ZEXTERN int ZEXPORT deflatePending(z_streamp strm, + unsigned *pending, + int *bits); +/* + deflatePending() returns the number of bytes and bits of output that have + been generated, but not yet provided in the available output. The bytes not + provided would be due to the available output space having being consumed. + The number of bits of output not provided are between 0 and 7, where they + await more bits to join them in order to fill out a full byte. If pending + or bits are Z_NULL, then those values are not set. + + deflatePending returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent. + */ + +ZEXTERN int ZEXPORT deflatePrime(z_streamp strm, + int bits, + int value); +/* + deflatePrime() inserts bits in the deflate output stream. The intent + is that this function is used to start off the deflate output with the bits + leftover from a previous deflate stream when appending to it. As such, this + function can only be used for raw deflate, and must be used before the first + deflate() call after a deflateInit2() or deflateReset(). bits must be less + than or equal to 16, and that many of the least significant bits of value + will be inserted in the output. + + deflatePrime returns Z_OK if success, Z_BUF_ERROR if there was not enough + room in the internal buffer to insert the bits, or Z_STREAM_ERROR if the + source stream state was inconsistent. +*/ + +ZEXTERN int ZEXPORT deflateSetHeader(z_streamp strm, + gz_headerp head); +/* + deflateSetHeader() provides gzip header information for when a gzip + stream is requested by deflateInit2(). deflateSetHeader() may be called + after deflateInit2() or deflateReset() and before the first call of + deflate(). The text, time, os, extra field, name, and comment information + in the provided gz_header structure are written to the gzip header (xflag is + ignored -- the extra flags are set according to the compression level). The + caller must assure that, if not Z_NULL, name and comment are terminated with + a zero byte, and that if extra is not Z_NULL, that extra_len bytes are + available there. If hcrc is true, a gzip header crc is included. Note that + the current versions of the command-line version of gzip (up through version + 1.3.x) do not support header crc's, and will report that it is a "multi-part + gzip file" and give up. + + If deflateSetHeader is not used, the default gzip header has text false, + the time set to zero, and os set to the current operating system, with no + extra, name, or comment fields. The gzip header is returned to the default + state by deflateReset(). + + deflateSetHeader returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent. +*/ + +/* +ZEXTERN int ZEXPORT inflateInit2(z_streamp strm, + int windowBits); + + This is another version of inflateInit with an extra parameter. The + fields next_in, avail_in, zalloc, zfree and opaque must be initialized + before by the caller. + + The windowBits parameter is the base two logarithm of the maximum window + size (the size of the history buffer). It should be in the range 8..15 for + this version of the library. The default value is 15 if inflateInit is used + instead. windowBits must be greater than or equal to the windowBits value + provided to deflateInit2() while compressing, or it must be equal to 15 if + deflateInit2() was not used. If a compressed stream with a larger window + size is given as input, inflate() will return with the error code + Z_DATA_ERROR instead of trying to allocate a larger window. + + windowBits can also be zero to request that inflate use the window size in + the zlib header of the compressed stream. + + windowBits can also be -8..-15 for raw inflate. In this case, -windowBits + determines the window size. inflate() will then process raw deflate data, + not looking for a zlib or gzip header, not generating a check value, and not + looking for any check values for comparison at the end of the stream. This + is for use with other formats that use the deflate compressed data format + such as zip. Those formats provide their own check values. If a custom + format is developed using the raw deflate format for compressed data, it is + recommended that a check value such as an Adler-32 or a CRC-32 be applied to + the uncompressed data as is done in the zlib, gzip, and zip formats. For + most applications, the zlib format should be used as is. Note that comments + above on the use in deflateInit2() applies to the magnitude of windowBits. + + windowBits can also be greater than 15 for optional gzip decoding. Add + 32 to windowBits to enable zlib and gzip decoding with automatic header + detection, or add 16 to decode only the gzip format (the zlib format will + return a Z_DATA_ERROR). If a gzip stream is being decoded, strm->adler is a + CRC-32 instead of an Adler-32. Unlike the gunzip utility and gzread() (see + below), inflate() will *not* automatically decode concatenated gzip members. + inflate() will return Z_STREAM_END at the end of the gzip member. The state + would need to be reset to continue decoding a subsequent gzip member. This + *must* be done if there is more data after a gzip member, in order for the + decompression to be compliant with the gzip standard (RFC 1952). + + inflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_VERSION_ERROR if the zlib library version is incompatible with the + version assumed by the caller, or Z_STREAM_ERROR if the parameters are + invalid, such as a null pointer to the structure. msg is set to null if + there is no error message. inflateInit2 does not perform any decompression + apart from possibly reading the zlib header if present: actual decompression + will be done by inflate(). (So next_in and avail_in may be modified, but + next_out and avail_out are unused and unchanged.) The current implementation + of inflateInit2() does not process any header information -- that is + deferred until inflate() is called. +*/ + +ZEXTERN int ZEXPORT inflateSetDictionary(z_streamp strm, + const Bytef *dictionary, + uInt dictLength); +/* + Initializes the decompression dictionary from the given uncompressed byte + sequence. This function must be called immediately after a call of inflate, + if that call returned Z_NEED_DICT. The dictionary chosen by the compressor + can be determined from the Adler-32 value returned by that call of inflate. + The compressor and decompressor must use exactly the same dictionary (see + deflateSetDictionary). For raw inflate, this function can be called at any + time to set the dictionary. If the provided dictionary is smaller than the + window and there is already data in the window, then the provided dictionary + will amend what's there. The application must insure that the dictionary + that was used for compression is provided. + + inflateSetDictionary returns Z_OK if success, Z_STREAM_ERROR if a + parameter is invalid (e.g. dictionary being Z_NULL) or the stream state is + inconsistent, Z_DATA_ERROR if the given dictionary doesn't match the + expected one (incorrect Adler-32 value). inflateSetDictionary does not + perform any decompression: this will be done by subsequent calls of + inflate(). +*/ + +ZEXTERN int ZEXPORT inflateGetDictionary(z_streamp strm, + Bytef *dictionary, + uInt *dictLength); +/* + Returns the sliding dictionary being maintained by inflate. dictLength is + set to the number of bytes in the dictionary, and that many bytes are copied + to dictionary. dictionary must have enough space, where 32768 bytes is + always enough. If inflateGetDictionary() is called with dictionary equal to + Z_NULL, then only the dictionary length is returned, and nothing is copied. + Similarly, if dictLength is Z_NULL, then it is not set. + + inflateGetDictionary returns Z_OK on success, or Z_STREAM_ERROR if the + stream state is inconsistent. +*/ + +ZEXTERN int ZEXPORT inflateSync(z_streamp strm); +/* + Skips invalid compressed data until a possible full flush point (see above + for the description of deflate with Z_FULL_FLUSH) can be found, or until all + available input is skipped. No output is provided. + + inflateSync searches for a 00 00 FF FF pattern in the compressed data. + All full flush points have this pattern, but not all occurrences of this + pattern are full flush points. + + inflateSync returns Z_OK if a possible full flush point has been found, + Z_BUF_ERROR if no more input was provided, Z_DATA_ERROR if no flush point + has been found, or Z_STREAM_ERROR if the stream structure was inconsistent. + In the success case, the application may save the current value of total_in + which indicates where valid compressed data was found. In the error case, + the application may repeatedly call inflateSync, providing more input each + time, until success or end of the input data. +*/ + +ZEXTERN int ZEXPORT inflateCopy(z_streamp dest, + z_streamp source); +/* + Sets the destination stream as a complete copy of the source stream. + + This function can be useful when randomly accessing a large stream. The + first pass through the stream can periodically record the inflate state, + allowing restarting inflate at those points when randomly accessing the + stream. + + inflateCopy returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_STREAM_ERROR if the source stream state was inconsistent + (such as zalloc being Z_NULL). msg is left unchanged in both source and + destination. +*/ + +ZEXTERN int ZEXPORT inflateReset(z_streamp strm); +/* + This function is equivalent to inflateEnd followed by inflateInit, + but does not free and reallocate the internal decompression state. The + stream will keep attributes that may have been set by inflateInit2. + total_in, total_out, adler, and msg are initialized. + + inflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent (such as zalloc or state being Z_NULL). +*/ + +ZEXTERN int ZEXPORT inflateReset2(z_streamp strm, + int windowBits); +/* + This function is the same as inflateReset, but it also permits changing + the wrap and window size requests. The windowBits parameter is interpreted + the same as it is for inflateInit2. If the window size is changed, then the + memory allocated for the window is freed, and the window will be reallocated + by inflate() if needed. + + inflateReset2 returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent (such as zalloc or state being Z_NULL), or if + the windowBits parameter is invalid. +*/ + +ZEXTERN int ZEXPORT inflatePrime(z_streamp strm, + int bits, + int value); +/* + This function inserts bits in the inflate input stream. The intent is + that this function is used to start inflating at a bit position in the + middle of a byte. The provided bits will be used before any bytes are used + from next_in. This function should only be used with raw inflate, and + should be used before the first inflate() call after inflateInit2() or + inflateReset(). bits must be less than or equal to 16, and that many of the + least significant bits of value will be inserted in the input. + + If bits is negative, then the input stream bit buffer is emptied. Then + inflatePrime() can be called again to put bits in the buffer. This is used + to clear out bits leftover after feeding inflate a block description prior + to feeding inflate codes. + + inflatePrime returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent. +*/ + +ZEXTERN long ZEXPORT inflateMark(z_streamp strm); +/* + This function returns two values, one in the lower 16 bits of the return + value, and the other in the remaining upper bits, obtained by shifting the + return value down 16 bits. If the upper value is -1 and the lower value is + zero, then inflate() is currently decoding information outside of a block. + If the upper value is -1 and the lower value is non-zero, then inflate is in + the middle of a stored block, with the lower value equaling the number of + bytes from the input remaining to copy. If the upper value is not -1, then + it is the number of bits back from the current bit position in the input of + the code (literal or length/distance pair) currently being processed. In + that case the lower value is the number of bytes already emitted for that + code. + + A code is being processed if inflate is waiting for more input to complete + decoding of the code, or if it has completed decoding but is waiting for + more output space to write the literal or match data. + + inflateMark() is used to mark locations in the input data for random + access, which may be at bit positions, and to note those cases where the + output of a code may span boundaries of random access blocks. The current + location in the input stream can be determined from avail_in and data_type + as noted in the description for the Z_BLOCK flush parameter for inflate. + + inflateMark returns the value noted above, or -65536 if the provided + source stream state was inconsistent. +*/ + +ZEXTERN int ZEXPORT inflateGetHeader(z_streamp strm, + gz_headerp head); +/* + inflateGetHeader() requests that gzip header information be stored in the + provided gz_header structure. inflateGetHeader() may be called after + inflateInit2() or inflateReset(), and before the first call of inflate(). + As inflate() processes the gzip stream, head->done is zero until the header + is completed, at which time head->done is set to one. If a zlib stream is + being decoded, then head->done is set to -1 to indicate that there will be + no gzip header information forthcoming. Note that Z_BLOCK or Z_TREES can be + used to force inflate() to return immediately after header processing is + complete and before any actual data is decompressed. + + The text, time, xflags, and os fields are filled in with the gzip header + contents. hcrc is set to true if there is a header CRC. (The header CRC + was valid if done is set to one.) If extra is not Z_NULL, then extra_max + contains the maximum number of bytes to write to extra. Once done is true, + extra_len contains the actual extra field length, and extra contains the + extra field, or that field truncated if extra_max is less than extra_len. + If name is not Z_NULL, then up to name_max characters are written there, + terminated with a zero unless the length is greater than name_max. If + comment is not Z_NULL, then up to comm_max characters are written there, + terminated with a zero unless the length is greater than comm_max. When any + of extra, name, or comment are not Z_NULL and the respective field is not + present in the header, then that field is set to Z_NULL to signal its + absence. This allows the use of deflateSetHeader() with the returned + structure to duplicate the header. However if those fields are set to + allocated memory, then the application will need to save those pointers + elsewhere so that they can be eventually freed. + + If inflateGetHeader is not used, then the header information is simply + discarded. The header is always checked for validity, including the header + CRC if present. inflateReset() will reset the process to discard the header + information. The application would need to call inflateGetHeader() again to + retrieve the header from the next gzip stream. + + inflateGetHeader returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent. +*/ + +/* +ZEXTERN int ZEXPORT inflateBackInit(z_streamp strm, int windowBits, + unsigned char FAR *window); + + Initialize the internal stream state for decompression using inflateBack() + calls. The fields zalloc, zfree and opaque in strm must be initialized + before the call. If zalloc and zfree are Z_NULL, then the default library- + derived memory allocation routines are used. windowBits is the base two + logarithm of the window size, in the range 8..15. window is a caller + supplied buffer of that size. Except for special applications where it is + assured that deflate was used with small window sizes, windowBits must be 15 + and a 32K byte window must be supplied to be able to decompress general + deflate streams. + + See inflateBack() for the usage of these routines. + + inflateBackInit will return Z_OK on success, Z_STREAM_ERROR if any of + the parameters are invalid, Z_MEM_ERROR if the internal state could not be + allocated, or Z_VERSION_ERROR if the version of the library does not match + the version of the header file. +*/ + +typedef unsigned (*in_func)(void FAR *, + z_const unsigned char FAR * FAR *); +typedef int (*out_func)(void FAR *, unsigned char FAR *, unsigned); + +ZEXTERN int ZEXPORT inflateBack(z_streamp strm, + in_func in, void FAR *in_desc, + out_func out, void FAR *out_desc); +/* + inflateBack() does a raw inflate with a single call using a call-back + interface for input and output. This is potentially more efficient than + inflate() for file i/o applications, in that it avoids copying between the + output and the sliding window by simply making the window itself the output + buffer. inflate() can be faster on modern CPUs when used with large + buffers. inflateBack() trusts the application to not change the output + buffer passed by the output function, at least until inflateBack() returns. + + inflateBackInit() must be called first to allocate the internal state + and to initialize the state with the user-provided window buffer. + inflateBack() may then be used multiple times to inflate a complete, raw + deflate stream with each call. inflateBackEnd() is then called to free the + allocated state. + + A raw deflate stream is one with no zlib or gzip header or trailer. + This routine would normally be used in a utility that reads zip or gzip + files and writes out uncompressed files. The utility would decode the + header and process the trailer on its own, hence this routine expects only + the raw deflate stream to decompress. This is different from the default + behavior of inflate(), which expects a zlib header and trailer around the + deflate stream. + + inflateBack() uses two subroutines supplied by the caller that are then + called by inflateBack() for input and output. inflateBack() calls those + routines until it reads a complete deflate stream and writes out all of the + uncompressed data, or until it encounters an error. The function's + parameters and return types are defined above in the in_func and out_func + typedefs. inflateBack() will call in(in_desc, &buf) which should return the + number of bytes of provided input, and a pointer to that input in buf. If + there is no input available, in() must return zero -- buf is ignored in that + case -- and inflateBack() will return a buffer error. inflateBack() will + call out(out_desc, buf, len) to write the uncompressed data buf[0..len-1]. + out() should return zero on success, or non-zero on failure. If out() + returns non-zero, inflateBack() will return with an error. Neither in() nor + out() are permitted to change the contents of the window provided to + inflateBackInit(), which is also the buffer that out() uses to write from. + The length written by out() will be at most the window size. Any non-zero + amount of input may be provided by in(). + + For convenience, inflateBack() can be provided input on the first call by + setting strm->next_in and strm->avail_in. If that input is exhausted, then + in() will be called. Therefore strm->next_in must be initialized before + calling inflateBack(). If strm->next_in is Z_NULL, then in() will be called + immediately for input. If strm->next_in is not Z_NULL, then strm->avail_in + must also be initialized, and then if strm->avail_in is not zero, input will + initially be taken from strm->next_in[0 .. strm->avail_in - 1]. + + The in_desc and out_desc parameters of inflateBack() is passed as the + first parameter of in() and out() respectively when they are called. These + descriptors can be optionally used to pass any information that the caller- + supplied in() and out() functions need to do their job. + + On return, inflateBack() will set strm->next_in and strm->avail_in to + pass back any unused input that was provided by the last in() call. The + return values of inflateBack() can be Z_STREAM_END on success, Z_BUF_ERROR + if in() or out() returned an error, Z_DATA_ERROR if there was a format error + in the deflate stream (in which case strm->msg is set to indicate the nature + of the error), or Z_STREAM_ERROR if the stream was not properly initialized. + In the case of Z_BUF_ERROR, an input or output error can be distinguished + using strm->next_in which will be Z_NULL only if in() returned an error. If + strm->next_in is not Z_NULL, then the Z_BUF_ERROR was due to out() returning + non-zero. (in() will always be called before out(), so strm->next_in is + assured to be defined if out() returns non-zero.) Note that inflateBack() + cannot return Z_OK. +*/ + +ZEXTERN int ZEXPORT inflateBackEnd(z_streamp strm); +/* + All memory allocated by inflateBackInit() is freed. + + inflateBackEnd() returns Z_OK on success, or Z_STREAM_ERROR if the stream + state was inconsistent. +*/ + +ZEXTERN uLong ZEXPORT zlibCompileFlags(void); +/* Return flags indicating compile-time options. + + Type sizes, two bits each, 00 = 16 bits, 01 = 32, 10 = 64, 11 = other: + 1.0: size of uInt + 3.2: size of uLong + 5.4: size of voidpf (pointer) + 7.6: size of z_off_t + + Compiler, assembler, and debug options: + 8: ZLIB_DEBUG + 9: ASMV or ASMINF -- use ASM code + 10: ZLIB_WINAPI -- exported functions use the WINAPI calling convention + 11: 0 (reserved) + + One-time table building (smaller code, but not thread-safe if true): + 12: BUILDFIXED -- build static block decoding tables when needed + 13: DYNAMIC_CRC_TABLE -- build CRC calculation tables when needed + 14,15: 0 (reserved) + + Library content (indicates missing functionality): + 16: NO_GZCOMPRESS -- gz* functions cannot compress (to avoid linking + deflate code when not needed) + 17: NO_GZIP -- deflate can't write gzip streams, and inflate can't detect + and decode gzip streams (to avoid linking crc code) + 18-19: 0 (reserved) + + Operation variations (changes in library functionality): + 20: PKZIP_BUG_WORKAROUND -- slightly more permissive inflate + 21: FASTEST -- deflate algorithm with only one, lowest compression level + 22,23: 0 (reserved) + + The sprintf variant used by gzprintf (zero is best): + 24: 0 = vs*, 1 = s* -- 1 means limited to 20 arguments after the format + 25: 0 = *nprintf, 1 = *printf -- 1 means gzprintf() not secure! + 26: 0 = returns value, 1 = void -- 1 means inferred string length returned + + Remainder: + 27-31: 0 (reserved) + */ + +#ifndef Z_SOLO + + /* utility functions */ + +/* + The following utility functions are implemented on top of the basic + stream-oriented functions. To simplify the interface, some default options + are assumed (compression level and memory usage, standard memory allocation + functions). The source code of these utility functions can be modified if + you need special options. +*/ + +ZEXTERN int ZEXPORT compress(Bytef *dest, uLongf *destLen, + const Bytef *source, uLong sourceLen); +/* + Compresses the source buffer into the destination buffer. sourceLen is + the byte length of the source buffer. Upon entry, destLen is the total size + of the destination buffer, which must be at least the value returned by + compressBound(sourceLen). Upon exit, destLen is the actual size of the + compressed data. compress() is equivalent to compress2() with a level + parameter of Z_DEFAULT_COMPRESSION. + + compress returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_BUF_ERROR if there was not enough room in the output + buffer. +*/ + +ZEXTERN int ZEXPORT compress2(Bytef *dest, uLongf *destLen, + const Bytef *source, uLong sourceLen, + int level); +/* + Compresses the source buffer into the destination buffer. The level + parameter has the same meaning as in deflateInit. sourceLen is the byte + length of the source buffer. Upon entry, destLen is the total size of the + destination buffer, which must be at least the value returned by + compressBound(sourceLen). Upon exit, destLen is the actual size of the + compressed data. + + compress2 returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_BUF_ERROR if there was not enough room in the output buffer, + Z_STREAM_ERROR if the level parameter is invalid. +*/ + +ZEXTERN uLong ZEXPORT compressBound(uLong sourceLen); +/* + compressBound() returns an upper bound on the compressed size after + compress() or compress2() on sourceLen bytes. It would be used before a + compress() or compress2() call to allocate the destination buffer. +*/ + +ZEXTERN int ZEXPORT uncompress(Bytef *dest, uLongf *destLen, + const Bytef *source, uLong sourceLen); +/* + Decompresses the source buffer into the destination buffer. sourceLen is + the byte length of the source buffer. Upon entry, destLen is the total size + of the destination buffer, which must be large enough to hold the entire + uncompressed data. (The size of the uncompressed data must have been saved + previously by the compressor and transmitted to the decompressor by some + mechanism outside the scope of this compression library.) Upon exit, destLen + is the actual size of the uncompressed data. + + uncompress returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_BUF_ERROR if there was not enough room in the output + buffer, or Z_DATA_ERROR if the input data was corrupted or incomplete. In + the case where there is not enough room, uncompress() will fill the output + buffer with the uncompressed data up to that point. +*/ + +ZEXTERN int ZEXPORT uncompress2(Bytef *dest, uLongf *destLen, + const Bytef *source, uLong *sourceLen); +/* + Same as uncompress, except that sourceLen is a pointer, where the + length of the source is *sourceLen. On return, *sourceLen is the number of + source bytes consumed. +*/ + + /* gzip file access functions */ + +/* + This library supports reading and writing files in gzip (.gz) format with + an interface similar to that of stdio, using the functions that start with + "gz". The gzip format is different from the zlib format. gzip is a gzip + wrapper, documented in RFC 1952, wrapped around a deflate stream. +*/ + +typedef struct gzFile_s *gzFile; /* semi-opaque gzip file descriptor */ + +/* +ZEXTERN gzFile ZEXPORT gzopen(const char *path, const char *mode); + + Open the gzip (.gz) file at path for reading and decompressing, or + compressing and writing. The mode parameter is as in fopen ("rb" or "wb") + but can also include a compression level ("wb9") or a strategy: 'f' for + filtered data as in "wb6f", 'h' for Huffman-only compression as in "wb1h", + 'R' for run-length encoding as in "wb1R", or 'F' for fixed code compression + as in "wb9F". (See the description of deflateInit2 for more information + about the strategy parameter.) 'T' will request transparent writing or + appending with no compression and not using the gzip format. + + "a" can be used instead of "w" to request that the gzip stream that will + be written be appended to the file. "+" will result in an error, since + reading and writing to the same gzip file is not supported. The addition of + "x" when writing will create the file exclusively, which fails if the file + already exists. On systems that support it, the addition of "e" when + reading or writing will set the flag to close the file on an execve() call. + + These functions, as well as gzip, will read and decode a sequence of gzip + streams in a file. The append function of gzopen() can be used to create + such a file. (Also see gzflush() for another way to do this.) When + appending, gzopen does not test whether the file begins with a gzip stream, + nor does it look for the end of the gzip streams to begin appending. gzopen + will simply append a gzip stream to the existing file. + + gzopen can be used to read a file which is not in gzip format; in this + case gzread will directly read from the file without decompression. When + reading, this will be detected automatically by looking for the magic two- + byte gzip header. + + gzopen returns NULL if the file could not be opened, if there was + insufficient memory to allocate the gzFile state, or if an invalid mode was + specified (an 'r', 'w', or 'a' was not provided, or '+' was provided). + errno can be checked to determine if the reason gzopen failed was that the + file could not be opened. +*/ + +ZEXTERN gzFile ZEXPORT gzdopen(int fd, const char *mode); +/* + Associate a gzFile with the file descriptor fd. File descriptors are + obtained from calls like open, dup, creat, pipe or fileno (if the file has + been previously opened with fopen). The mode parameter is as in gzopen. + + The next call of gzclose on the returned gzFile will also close the file + descriptor fd, just like fclose(fdopen(fd, mode)) closes the file descriptor + fd. If you want to keep fd open, use fd = dup(fd_keep); gz = gzdopen(fd, + mode);. The duplicated descriptor should be saved to avoid a leak, since + gzdopen does not close fd if it fails. If you are using fileno() to get the + file descriptor from a FILE *, then you will have to use dup() to avoid + double-close()ing the file descriptor. Both gzclose() and fclose() will + close the associated file descriptor, so they need to have different file + descriptors. + + gzdopen returns NULL if there was insufficient memory to allocate the + gzFile state, if an invalid mode was specified (an 'r', 'w', or 'a' was not + provided, or '+' was provided), or if fd is -1. The file descriptor is not + used until the next gz* read, write, seek, or close operation, so gzdopen + will not detect if fd is invalid (unless fd is -1). +*/ + +ZEXTERN int ZEXPORT gzbuffer(gzFile file, unsigned size); +/* + Set the internal buffer size used by this library's functions for file to + size. The default buffer size is 8192 bytes. This function must be called + after gzopen() or gzdopen(), and before any other calls that read or write + the file. The buffer memory allocation is always deferred to the first read + or write. Three times that size in buffer space is allocated. A larger + buffer size of, for example, 64K or 128K bytes will noticeably increase the + speed of decompression (reading). + + The new buffer size also affects the maximum length for gzprintf(). + + gzbuffer() returns 0 on success, or -1 on failure, such as being called + too late. +*/ + +ZEXTERN int ZEXPORT gzsetparams(gzFile file, int level, int strategy); +/* + Dynamically update the compression level and strategy for file. See the + description of deflateInit2 for the meaning of these parameters. Previously + provided data is flushed before applying the parameter changes. + + gzsetparams returns Z_OK if success, Z_STREAM_ERROR if the file was not + opened for writing, Z_ERRNO if there is an error writing the flushed data, + or Z_MEM_ERROR if there is a memory allocation error. +*/ + +ZEXTERN int ZEXPORT gzread(gzFile file, voidp buf, unsigned len); +/* + Read and decompress up to len uncompressed bytes from file into buf. If + the input file is not in gzip format, gzread copies the given number of + bytes into the buffer directly from the file. + + After reaching the end of a gzip stream in the input, gzread will continue + to read, looking for another gzip stream. Any number of gzip streams may be + concatenated in the input file, and will all be decompressed by gzread(). + If something other than a gzip stream is encountered after a gzip stream, + that remaining trailing garbage is ignored (and no error is returned). + + gzread can be used to read a gzip file that is being concurrently written. + Upon reaching the end of the input, gzread will return with the available + data. If the error code returned by gzerror is Z_OK or Z_BUF_ERROR, then + gzclearerr can be used to clear the end of file indicator in order to permit + gzread to be tried again. Z_OK indicates that a gzip stream was completed + on the last gzread. Z_BUF_ERROR indicates that the input file ended in the + middle of a gzip stream. Note that gzread does not return -1 in the event + of an incomplete gzip stream. This error is deferred until gzclose(), which + will return Z_BUF_ERROR if the last gzread ended in the middle of a gzip + stream. Alternatively, gzerror can be used before gzclose to detect this + case. + + gzread returns the number of uncompressed bytes actually read, less than + len for end of file, or -1 for error. If len is too large to fit in an int, + then nothing is read, -1 is returned, and the error state is set to + Z_STREAM_ERROR. +*/ + +ZEXTERN z_size_t ZEXPORT gzfread(voidp buf, z_size_t size, z_size_t nitems, + gzFile file); +/* + Read and decompress up to nitems items of size size from file into buf, + otherwise operating as gzread() does. This duplicates the interface of + stdio's fread(), with size_t request and return types. If the library + defines size_t, then z_size_t is identical to size_t. If not, then z_size_t + is an unsigned integer type that can contain a pointer. + + gzfread() returns the number of full items read of size size, or zero if + the end of the file was reached and a full item could not be read, or if + there was an error. gzerror() must be consulted if zero is returned in + order to determine if there was an error. If the multiplication of size and + nitems overflows, i.e. the product does not fit in a z_size_t, then nothing + is read, zero is returned, and the error state is set to Z_STREAM_ERROR. + + In the event that the end of file is reached and only a partial item is + available at the end, i.e. the remaining uncompressed data length is not a + multiple of size, then the final partial item is nevertheless read into buf + and the end-of-file flag is set. The length of the partial item read is not + provided, but could be inferred from the result of gztell(). This behavior + is the same as the behavior of fread() implementations in common libraries, + but it prevents the direct use of gzfread() to read a concurrently written + file, resetting and retrying on end-of-file, when size is not 1. +*/ + +ZEXTERN int ZEXPORT gzwrite(gzFile file, voidpc buf, unsigned len); +/* + Compress and write the len uncompressed bytes at buf to file. gzwrite + returns the number of uncompressed bytes written or 0 in case of error. +*/ + +ZEXTERN z_size_t ZEXPORT gzfwrite(voidpc buf, z_size_t size, + z_size_t nitems, gzFile file); +/* + Compress and write nitems items of size size from buf to file, duplicating + the interface of stdio's fwrite(), with size_t request and return types. If + the library defines size_t, then z_size_t is identical to size_t. If not, + then z_size_t is an unsigned integer type that can contain a pointer. + + gzfwrite() returns the number of full items written of size size, or zero + if there was an error. If the multiplication of size and nitems overflows, + i.e. the product does not fit in a z_size_t, then nothing is written, zero + is returned, and the error state is set to Z_STREAM_ERROR. +*/ + +ZEXTERN int ZEXPORTVA gzprintf(gzFile file, const char *format, ...); +/* + Convert, format, compress, and write the arguments (...) to file under + control of the string format, as in fprintf. gzprintf returns the number of + uncompressed bytes actually written, or a negative zlib error code in case + of error. The number of uncompressed bytes written is limited to 8191, or + one less than the buffer size given to gzbuffer(). The caller should assure + that this limit is not exceeded. If it is exceeded, then gzprintf() will + return an error (0) with nothing written. In this case, there may also be a + buffer overflow with unpredictable consequences, which is possible only if + zlib was compiled with the insecure functions sprintf() or vsprintf(), + because the secure snprintf() or vsnprintf() functions were not available. + This can be determined using zlibCompileFlags(). +*/ + +ZEXTERN int ZEXPORT gzputs(gzFile file, const char *s); +/* + Compress and write the given null-terminated string s to file, excluding + the terminating null character. + + gzputs returns the number of characters written, or -1 in case of error. +*/ + +ZEXTERN char * ZEXPORT gzgets(gzFile file, char *buf, int len); +/* + Read and decompress bytes from file into buf, until len-1 characters are + read, or until a newline character is read and transferred to buf, or an + end-of-file condition is encountered. If any characters are read or if len + is one, the string is terminated with a null character. If no characters + are read due to an end-of-file or len is less than one, then the buffer is + left untouched. + + gzgets returns buf which is a null-terminated string, or it returns NULL + for end-of-file or in case of error. If there was an error, the contents at + buf are indeterminate. +*/ + +ZEXTERN int ZEXPORT gzputc(gzFile file, int c); +/* + Compress and write c, converted to an unsigned char, into file. gzputc + returns the value that was written, or -1 in case of error. +*/ + +ZEXTERN int ZEXPORT gzgetc(gzFile file); +/* + Read and decompress one byte from file. gzgetc returns this byte or -1 + in case of end of file or error. This is implemented as a macro for speed. + As such, it does not do all of the checking the other functions do. I.e. + it does not check to see if file is NULL, nor whether the structure file + points to has been clobbered or not. +*/ + +ZEXTERN int ZEXPORT gzungetc(int c, gzFile file); +/* + Push c back onto the stream for file to be read as the first character on + the next read. At least one character of push-back is always allowed. + gzungetc() returns the character pushed, or -1 on failure. gzungetc() will + fail if c is -1, and may fail if a character has been pushed but not read + yet. If gzungetc is used immediately after gzopen or gzdopen, at least the + output buffer size of pushed characters is allowed. (See gzbuffer above.) + The pushed character will be discarded if the stream is repositioned with + gzseek() or gzrewind(). +*/ + +ZEXTERN int ZEXPORT gzflush(gzFile file, int flush); +/* + Flush all pending output to file. The parameter flush is as in the + deflate() function. The return value is the zlib error number (see function + gzerror below). gzflush is only permitted when writing. + + If the flush parameter is Z_FINISH, the remaining data is written and the + gzip stream is completed in the output. If gzwrite() is called again, a new + gzip stream will be started in the output. gzread() is able to read such + concatenated gzip streams. + + gzflush should be called only when strictly necessary because it will + degrade compression if called too often. +*/ + +/* +ZEXTERN z_off_t ZEXPORT gzseek(gzFile file, + z_off_t offset, int whence); + + Set the starting position to offset relative to whence for the next gzread + or gzwrite on file. The offset represents a number of bytes in the + uncompressed data stream. The whence parameter is defined as in lseek(2); + the value SEEK_END is not supported. + + If the file is opened for reading, this function is emulated but can be + extremely slow. If the file is opened for writing, only forward seeks are + supported; gzseek then compresses a sequence of zeroes up to the new + starting position. + + gzseek returns the resulting offset location as measured in bytes from + the beginning of the uncompressed stream, or -1 in case of error, in + particular if the file is opened for writing and the new starting position + would be before the current position. +*/ + +ZEXTERN int ZEXPORT gzrewind(gzFile file); +/* + Rewind file. This function is supported only for reading. + + gzrewind(file) is equivalent to (int)gzseek(file, 0L, SEEK_SET). +*/ + +/* +ZEXTERN z_off_t ZEXPORT gztell(gzFile file); + + Return the starting position for the next gzread or gzwrite on file. + This position represents a number of bytes in the uncompressed data stream, + and is zero when starting, even if appending or reading a gzip stream from + the middle of a file using gzdopen(). + + gztell(file) is equivalent to gzseek(file, 0L, SEEK_CUR) +*/ + +/* +ZEXTERN z_off_t ZEXPORT gzoffset(gzFile file); + + Return the current compressed (actual) read or write offset of file. This + offset includes the count of bytes that precede the gzip stream, for example + when appending or when using gzdopen() for reading. When reading, the + offset does not include as yet unused buffered input. This information can + be used for a progress indicator. On error, gzoffset() returns -1. +*/ + +ZEXTERN int ZEXPORT gzeof(gzFile file); +/* + Return true (1) if the end-of-file indicator for file has been set while + reading, false (0) otherwise. Note that the end-of-file indicator is set + only if the read tried to go past the end of the input, but came up short. + Therefore, just like feof(), gzeof() may return false even if there is no + more data to read, in the event that the last read request was for the exact + number of bytes remaining in the input file. This will happen if the input + file size is an exact multiple of the buffer size. + + If gzeof() returns true, then the read functions will return no more data, + unless the end-of-file indicator is reset by gzclearerr() and the input file + has grown since the previous end of file was detected. +*/ + +ZEXTERN int ZEXPORT gzdirect(gzFile file); +/* + Return true (1) if file is being copied directly while reading, or false + (0) if file is a gzip stream being decompressed. + + If the input file is empty, gzdirect() will return true, since the input + does not contain a gzip stream. + + If gzdirect() is used immediately after gzopen() or gzdopen() it will + cause buffers to be allocated to allow reading the file to determine if it + is a gzip file. Therefore if gzbuffer() is used, it should be called before + gzdirect(). + + When writing, gzdirect() returns true (1) if transparent writing was + requested ("wT" for the gzopen() mode), or false (0) otherwise. (Note: + gzdirect() is not needed when writing. Transparent writing must be + explicitly requested, so the application already knows the answer. When + linking statically, using gzdirect() will include all of the zlib code for + gzip file reading and decompression, which may not be desired.) +*/ + +ZEXTERN int ZEXPORT gzclose(gzFile file); +/* + Flush all pending output for file, if necessary, close file and + deallocate the (de)compression state. Note that once file is closed, you + cannot call gzerror with file, since its structures have been deallocated. + gzclose must not be called more than once on the same file, just as free + must not be called more than once on the same allocation. + + gzclose will return Z_STREAM_ERROR if file is not valid, Z_ERRNO on a + file operation error, Z_MEM_ERROR if out of memory, Z_BUF_ERROR if the + last read ended in the middle of a gzip stream, or Z_OK on success. +*/ + +ZEXTERN int ZEXPORT gzclose_r(gzFile file); +ZEXTERN int ZEXPORT gzclose_w(gzFile file); +/* + Same as gzclose(), but gzclose_r() is only for use when reading, and + gzclose_w() is only for use when writing or appending. The advantage to + using these instead of gzclose() is that they avoid linking in zlib + compression or decompression code that is not used when only reading or only + writing respectively. If gzclose() is used, then both compression and + decompression code will be included the application when linking to a static + zlib library. +*/ + +ZEXTERN const char * ZEXPORT gzerror(gzFile file, int *errnum); +/* + Return the error message for the last error which occurred on file. + errnum is set to zlib error number. If an error occurred in the file system + and not in the compression library, errnum is set to Z_ERRNO and the + application may consult errno to get the exact error code. + + The application must not modify the returned string. Future calls to + this function may invalidate the previously returned string. If file is + closed, then the string previously returned by gzerror will no longer be + available. + + gzerror() should be used to distinguish errors from end-of-file for those + functions above that do not distinguish those cases in their return values. +*/ + +ZEXTERN void ZEXPORT gzclearerr(gzFile file); +/* + Clear the error and end-of-file flags for file. This is analogous to the + clearerr() function in stdio. This is useful for continuing to read a gzip + file that is being written concurrently. +*/ + +#endif /* !Z_SOLO */ + + /* checksum functions */ + +/* + These functions are not related to compression but are exported + anyway because they might be useful in applications using the compression + library. +*/ + +ZEXTERN uLong ZEXPORT adler32(uLong adler, const Bytef *buf, uInt len); +/* + Update a running Adler-32 checksum with the bytes buf[0..len-1] and + return the updated checksum. An Adler-32 value is in the range of a 32-bit + unsigned integer. If buf is Z_NULL, this function returns the required + initial value for the checksum. + + An Adler-32 checksum is almost as reliable as a CRC-32 but can be computed + much faster. + + Usage example: + + uLong adler = adler32(0L, Z_NULL, 0); + + while (read_buffer(buffer, length) != EOF) { + adler = adler32(adler, buffer, length); + } + if (adler != original_adler) error(); +*/ + +ZEXTERN uLong ZEXPORT adler32_z(uLong adler, const Bytef *buf, + z_size_t len); +/* + Same as adler32(), but with a size_t length. +*/ + +/* +ZEXTERN uLong ZEXPORT adler32_combine(uLong adler1, uLong adler2, + z_off_t len2); + + Combine two Adler-32 checksums into one. For two sequences of bytes, seq1 + and seq2 with lengths len1 and len2, Adler-32 checksums were calculated for + each, adler1 and adler2. adler32_combine() returns the Adler-32 checksum of + seq1 and seq2 concatenated, requiring only adler1, adler2, and len2. Note + that the z_off_t type (like off_t) is a signed integer. If len2 is + negative, the result has no meaning or utility. +*/ + +ZEXTERN uLong ZEXPORT crc32(uLong crc, const Bytef *buf, uInt len); +/* + Update a running CRC-32 with the bytes buf[0..len-1] and return the + updated CRC-32. A CRC-32 value is in the range of a 32-bit unsigned integer. + If buf is Z_NULL, this function returns the required initial value for the + crc. Pre- and post-conditioning (one's complement) is performed within this + function so it shouldn't be done by the application. + + Usage example: + + uLong crc = crc32(0L, Z_NULL, 0); + + while (read_buffer(buffer, length) != EOF) { + crc = crc32(crc, buffer, length); + } + if (crc != original_crc) error(); +*/ + +ZEXTERN uLong ZEXPORT crc32_z(uLong crc, const Bytef *buf, + z_size_t len); +/* + Same as crc32(), but with a size_t length. +*/ + +/* +ZEXTERN uLong ZEXPORT crc32_combine(uLong crc1, uLong crc2, z_off_t len2); + + Combine two CRC-32 check values into one. For two sequences of bytes, + seq1 and seq2 with lengths len1 and len2, CRC-32 check values were + calculated for each, crc1 and crc2. crc32_combine() returns the CRC-32 + check value of seq1 and seq2 concatenated, requiring only crc1, crc2, and + len2. len2 must be non-negative. +*/ + +/* +ZEXTERN uLong ZEXPORT crc32_combine_gen(z_off_t len2); + + Return the operator corresponding to length len2, to be used with + crc32_combine_op(). len2 must be non-negative. +*/ + +ZEXTERN uLong ZEXPORT crc32_combine_op(uLong crc1, uLong crc2, uLong op); +/* + Give the same result as crc32_combine(), using op in place of len2. op is + is generated from len2 by crc32_combine_gen(). This will be faster than + crc32_combine() if the generated op is used more than once. +*/ + + + /* various hacks, don't look :) */ + +/* deflateInit and inflateInit are macros to allow checking the zlib version + * and the compiler's view of z_stream: + */ +ZEXTERN int ZEXPORT deflateInit_(z_streamp strm, int level, + const char *version, int stream_size); +ZEXTERN int ZEXPORT inflateInit_(z_streamp strm, + const char *version, int stream_size); +ZEXTERN int ZEXPORT deflateInit2_(z_streamp strm, int level, int method, + int windowBits, int memLevel, + int strategy, const char *version, + int stream_size); +ZEXTERN int ZEXPORT inflateInit2_(z_streamp strm, int windowBits, + const char *version, int stream_size); +ZEXTERN int ZEXPORT inflateBackInit_(z_streamp strm, int windowBits, + unsigned char FAR *window, + const char *version, + int stream_size); +#ifdef Z_PREFIX_SET +# define z_deflateInit(strm, level) \ + deflateInit_((strm), (level), ZLIB_VERSION, (int)sizeof(z_stream)) +# define z_inflateInit(strm) \ + inflateInit_((strm), ZLIB_VERSION, (int)sizeof(z_stream)) +# define z_deflateInit2(strm, level, method, windowBits, memLevel, strategy) \ + deflateInit2_((strm),(level),(method),(windowBits),(memLevel),\ + (strategy), ZLIB_VERSION, (int)sizeof(z_stream)) +# define z_inflateInit2(strm, windowBits) \ + inflateInit2_((strm), (windowBits), ZLIB_VERSION, \ + (int)sizeof(z_stream)) +# define z_inflateBackInit(strm, windowBits, window) \ + inflateBackInit_((strm), (windowBits), (window), \ + ZLIB_VERSION, (int)sizeof(z_stream)) +#else +# define deflateInit(strm, level) \ + deflateInit_((strm), (level), ZLIB_VERSION, (int)sizeof(z_stream)) +# define inflateInit(strm) \ + inflateInit_((strm), ZLIB_VERSION, (int)sizeof(z_stream)) +# define deflateInit2(strm, level, method, windowBits, memLevel, strategy) \ + deflateInit2_((strm),(level),(method),(windowBits),(memLevel),\ + (strategy), ZLIB_VERSION, (int)sizeof(z_stream)) +# define inflateInit2(strm, windowBits) \ + inflateInit2_((strm), (windowBits), ZLIB_VERSION, \ + (int)sizeof(z_stream)) +# define inflateBackInit(strm, windowBits, window) \ + inflateBackInit_((strm), (windowBits), (window), \ + ZLIB_VERSION, (int)sizeof(z_stream)) +#endif + +#ifndef Z_SOLO + +/* gzgetc() macro and its supporting function and exposed data structure. Note + * that the real internal state is much larger than the exposed structure. + * This abbreviated structure exposes just enough for the gzgetc() macro. The + * user should not mess with these exposed elements, since their names or + * behavior could change in the future, perhaps even capriciously. They can + * only be used by the gzgetc() macro. You have been warned. + */ +struct gzFile_s { + unsigned have; + unsigned char *next; + z_off64_t pos; +}; +ZEXTERN int ZEXPORT gzgetc_(gzFile file); /* backward compatibility */ +#ifdef Z_PREFIX_SET +# undef z_gzgetc +# define z_gzgetc(g) \ + ((g)->have ? ((g)->have--, (g)->pos++, *((g)->next)++) : (gzgetc)(g)) +#else +# define gzgetc(g) \ + ((g)->have ? ((g)->have--, (g)->pos++, *((g)->next)++) : (gzgetc)(g)) +#endif + +/* provide 64-bit offset functions if _LARGEFILE64_SOURCE defined, and/or + * change the regular functions to 64 bits if _FILE_OFFSET_BITS is 64 (if + * both are true, the application gets the *64 functions, and the regular + * functions are changed to 64 bits) -- in case these are set on systems + * without large file support, _LFS64_LARGEFILE must also be true + */ +#ifdef Z_LARGE64 + ZEXTERN gzFile ZEXPORT gzopen64(const char *, const char *); + ZEXTERN z_off64_t ZEXPORT gzseek64(gzFile, z_off64_t, int); + ZEXTERN z_off64_t ZEXPORT gztell64(gzFile); + ZEXTERN z_off64_t ZEXPORT gzoffset64(gzFile); + ZEXTERN uLong ZEXPORT adler32_combine64(uLong, uLong, z_off64_t); + ZEXTERN uLong ZEXPORT crc32_combine64(uLong, uLong, z_off64_t); + ZEXTERN uLong ZEXPORT crc32_combine_gen64(z_off64_t); +#endif + +#if !defined(ZLIB_INTERNAL) && defined(Z_WANT64) +# ifdef Z_PREFIX_SET +# define z_gzopen z_gzopen64 +# define z_gzseek z_gzseek64 +# define z_gztell z_gztell64 +# define z_gzoffset z_gzoffset64 +# define z_adler32_combine z_adler32_combine64 +# define z_crc32_combine z_crc32_combine64 +# define z_crc32_combine_gen z_crc32_combine_gen64 +# else +# define gzopen gzopen64 +# define gzseek gzseek64 +# define gztell gztell64 +# define gzoffset gzoffset64 +# define adler32_combine adler32_combine64 +# define crc32_combine crc32_combine64 +# define crc32_combine_gen crc32_combine_gen64 +# endif +# ifndef Z_LARGE64 + ZEXTERN gzFile ZEXPORT gzopen64(const char *, const char *); + ZEXTERN z_off_t ZEXPORT gzseek64(gzFile, z_off_t, int); + ZEXTERN z_off_t ZEXPORT gztell64(gzFile); + ZEXTERN z_off_t ZEXPORT gzoffset64(gzFile); + ZEXTERN uLong ZEXPORT adler32_combine64(uLong, uLong, z_off_t); + ZEXTERN uLong ZEXPORT crc32_combine64(uLong, uLong, z_off_t); + ZEXTERN uLong ZEXPORT crc32_combine_gen64(z_off_t); +# endif +#else + ZEXTERN gzFile ZEXPORT gzopen(const char *, const char *); + ZEXTERN z_off_t ZEXPORT gzseek(gzFile, z_off_t, int); + ZEXTERN z_off_t ZEXPORT gztell(gzFile); + ZEXTERN z_off_t ZEXPORT gzoffset(gzFile); + ZEXTERN uLong ZEXPORT adler32_combine(uLong, uLong, z_off_t); + ZEXTERN uLong ZEXPORT crc32_combine(uLong, uLong, z_off_t); + ZEXTERN uLong ZEXPORT crc32_combine_gen(z_off_t); +#endif + +#else /* Z_SOLO */ + + ZEXTERN uLong ZEXPORT adler32_combine(uLong, uLong, z_off_t); + ZEXTERN uLong ZEXPORT crc32_combine(uLong, uLong, z_off_t); + ZEXTERN uLong ZEXPORT crc32_combine_gen(z_off_t); + +#endif /* !Z_SOLO */ + +/* undocumented functions */ +ZEXTERN const char * ZEXPORT zError(int); +ZEXTERN int ZEXPORT inflateSyncPoint(z_streamp); +ZEXTERN const z_crc_t FAR * ZEXPORT get_crc_table(void); +ZEXTERN int ZEXPORT inflateUndermine(z_streamp, int); +ZEXTERN int ZEXPORT inflateValidate(z_streamp, int); +ZEXTERN unsigned long ZEXPORT inflateCodesUsed(z_streamp); +ZEXTERN int ZEXPORT inflateResetKeep(z_streamp); +ZEXTERN int ZEXPORT deflateResetKeep(z_streamp); +#if defined(_WIN32) && !defined(Z_SOLO) +ZEXTERN gzFile ZEXPORT gzopen_w(const wchar_t *path, + const char *mode); +#endif +#if defined(STDC) || defined(Z_HAVE_STDARG_H) +# ifndef Z_SOLO +ZEXTERN int ZEXPORTVA gzvprintf(gzFile file, + const char *format, + va_list va); +# endif +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* ZLIB_H */ diff --git a/c-blosc/internal-complibs/zlib-1.3.1/zutil.c b/c-blosc/internal-complibs/zlib-1.3.1/zutil.c new file mode 100644 index 0000000..b1c5d2d --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.3.1/zutil.c @@ -0,0 +1,299 @@ +/* zutil.c -- target dependent utility functions for the compression library + * Copyright (C) 1995-2017 Jean-loup Gailly + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* @(#) $Id$ */ + +#include "zutil.h" +#ifndef Z_SOLO +# include "gzguts.h" +#endif + +z_const char * const z_errmsg[10] = { + (z_const char *)"need dictionary", /* Z_NEED_DICT 2 */ + (z_const char *)"stream end", /* Z_STREAM_END 1 */ + (z_const char *)"", /* Z_OK 0 */ + (z_const char *)"file error", /* Z_ERRNO (-1) */ + (z_const char *)"stream error", /* Z_STREAM_ERROR (-2) */ + (z_const char *)"data error", /* Z_DATA_ERROR (-3) */ + (z_const char *)"insufficient memory", /* Z_MEM_ERROR (-4) */ + (z_const char *)"buffer error", /* Z_BUF_ERROR (-5) */ + (z_const char *)"incompatible version",/* Z_VERSION_ERROR (-6) */ + (z_const char *)"" +}; + + +const char * ZEXPORT zlibVersion(void) { + return ZLIB_VERSION; +} + +uLong ZEXPORT zlibCompileFlags(void) { + uLong flags; + + flags = 0; + switch ((int)(sizeof(uInt))) { + case 2: break; + case 4: flags += 1; break; + case 8: flags += 2; break; + default: flags += 3; + } + switch ((int)(sizeof(uLong))) { + case 2: break; + case 4: flags += 1 << 2; break; + case 8: flags += 2 << 2; break; + default: flags += 3 << 2; + } + switch ((int)(sizeof(voidpf))) { + case 2: break; + case 4: flags += 1 << 4; break; + case 8: flags += 2 << 4; break; + default: flags += 3 << 4; + } + switch ((int)(sizeof(z_off_t))) { + case 2: break; + case 4: flags += 1 << 6; break; + case 8: flags += 2 << 6; break; + default: flags += 3 << 6; + } +#ifdef ZLIB_DEBUG + flags += 1 << 8; +#endif + /* +#if defined(ASMV) || defined(ASMINF) + flags += 1 << 9; +#endif + */ +#ifdef ZLIB_WINAPI + flags += 1 << 10; +#endif +#ifdef BUILDFIXED + flags += 1 << 12; +#endif +#ifdef DYNAMIC_CRC_TABLE + flags += 1 << 13; +#endif +#ifdef NO_GZCOMPRESS + flags += 1L << 16; +#endif +#ifdef NO_GZIP + flags += 1L << 17; +#endif +#ifdef PKZIP_BUG_WORKAROUND + flags += 1L << 20; +#endif +#ifdef FASTEST + flags += 1L << 21; +#endif +#if defined(STDC) || defined(Z_HAVE_STDARG_H) +# ifdef NO_vsnprintf + flags += 1L << 25; +# ifdef HAS_vsprintf_void + flags += 1L << 26; +# endif +# else +# ifdef HAS_vsnprintf_void + flags += 1L << 26; +# endif +# endif +#else + flags += 1L << 24; +# ifdef NO_snprintf + flags += 1L << 25; +# ifdef HAS_sprintf_void + flags += 1L << 26; +# endif +# else +# ifdef HAS_snprintf_void + flags += 1L << 26; +# endif +# endif +#endif + return flags; +} + +#ifdef ZLIB_DEBUG +#include +# ifndef verbose +# define verbose 0 +# endif +int ZLIB_INTERNAL z_verbose = verbose; + +void ZLIB_INTERNAL z_error(char *m) { + fprintf(stderr, "%s\n", m); + exit(1); +} +#endif + +/* exported to allow conversion of error code to string for compress() and + * uncompress() + */ +const char * ZEXPORT zError(int err) { + return ERR_MSG(err); +} + +#if defined(_WIN32_WCE) && _WIN32_WCE < 0x800 + /* The older Microsoft C Run-Time Library for Windows CE doesn't have + * errno. We define it as a global variable to simplify porting. + * Its value is always 0 and should not be used. + */ + int errno = 0; +#endif + +#ifndef HAVE_MEMCPY + +void ZLIB_INTERNAL zmemcpy(Bytef* dest, const Bytef* source, uInt len) { + if (len == 0) return; + do { + *dest++ = *source++; /* ??? to be unrolled */ + } while (--len != 0); +} + +int ZLIB_INTERNAL zmemcmp(const Bytef* s1, const Bytef* s2, uInt len) { + uInt j; + + for (j = 0; j < len; j++) { + if (s1[j] != s2[j]) return 2*(s1[j] > s2[j])-1; + } + return 0; +} + +void ZLIB_INTERNAL zmemzero(Bytef* dest, uInt len) { + if (len == 0) return; + do { + *dest++ = 0; /* ??? to be unrolled */ + } while (--len != 0); +} +#endif + +#ifndef Z_SOLO + +#ifdef SYS16BIT + +#ifdef __TURBOC__ +/* Turbo C in 16-bit mode */ + +# define MY_ZCALLOC + +/* Turbo C malloc() does not allow dynamic allocation of 64K bytes + * and farmalloc(64K) returns a pointer with an offset of 8, so we + * must fix the pointer. Warning: the pointer must be put back to its + * original form in order to free it, use zcfree(). + */ + +#define MAX_PTR 10 +/* 10*64K = 640K */ + +local int next_ptr = 0; + +typedef struct ptr_table_s { + voidpf org_ptr; + voidpf new_ptr; +} ptr_table; + +local ptr_table table[MAX_PTR]; +/* This table is used to remember the original form of pointers + * to large buffers (64K). Such pointers are normalized with a zero offset. + * Since MSDOS is not a preemptive multitasking OS, this table is not + * protected from concurrent access. This hack doesn't work anyway on + * a protected system like OS/2. Use Microsoft C instead. + */ + +voidpf ZLIB_INTERNAL zcalloc(voidpf opaque, unsigned items, unsigned size) { + voidpf buf; + ulg bsize = (ulg)items*size; + + (void)opaque; + + /* If we allocate less than 65520 bytes, we assume that farmalloc + * will return a usable pointer which doesn't have to be normalized. + */ + if (bsize < 65520L) { + buf = farmalloc(bsize); + if (*(ush*)&buf != 0) return buf; + } else { + buf = farmalloc(bsize + 16L); + } + if (buf == NULL || next_ptr >= MAX_PTR) return NULL; + table[next_ptr].org_ptr = buf; + + /* Normalize the pointer to seg:0 */ + *((ush*)&buf+1) += ((ush)((uch*)buf-0) + 15) >> 4; + *(ush*)&buf = 0; + table[next_ptr++].new_ptr = buf; + return buf; +} + +void ZLIB_INTERNAL zcfree(voidpf opaque, voidpf ptr) { + int n; + + (void)opaque; + + if (*(ush*)&ptr != 0) { /* object < 64K */ + farfree(ptr); + return; + } + /* Find the original pointer */ + for (n = 0; n < next_ptr; n++) { + if (ptr != table[n].new_ptr) continue; + + farfree(table[n].org_ptr); + while (++n < next_ptr) { + table[n-1] = table[n]; + } + next_ptr--; + return; + } + Assert(0, "zcfree: ptr not found"); +} + +#endif /* __TURBOC__ */ + + +#ifdef M_I86 +/* Microsoft C in 16-bit mode */ + +# define MY_ZCALLOC + +#if (!defined(_MSC_VER) || (_MSC_VER <= 600)) +# define _halloc halloc +# define _hfree hfree +#endif + +voidpf ZLIB_INTERNAL zcalloc(voidpf opaque, uInt items, uInt size) { + (void)opaque; + return _halloc((long)items, size); +} + +void ZLIB_INTERNAL zcfree(voidpf opaque, voidpf ptr) { + (void)opaque; + _hfree(ptr); +} + +#endif /* M_I86 */ + +#endif /* SYS16BIT */ + + +#ifndef MY_ZCALLOC /* Any system without a special alloc function */ + +#ifndef STDC +extern voidp malloc(uInt size); +extern voidp calloc(uInt items, uInt size); +extern void free(voidpf ptr); +#endif + +voidpf ZLIB_INTERNAL zcalloc(voidpf opaque, unsigned items, unsigned size) { + (void)opaque; + return sizeof(uInt) > 2 ? (voidpf)malloc(items * size) : + (voidpf)calloc(items, size); +} + +void ZLIB_INTERNAL zcfree(voidpf opaque, voidpf ptr) { + (void)opaque; + free(ptr); +} + +#endif /* MY_ZCALLOC */ + +#endif /* !Z_SOLO */ diff --git a/c-blosc/internal-complibs/zlib-1.3.1/zutil.h b/c-blosc/internal-complibs/zlib-1.3.1/zutil.h new file mode 100644 index 0000000..48dd7fe --- /dev/null +++ b/c-blosc/internal-complibs/zlib-1.3.1/zutil.h @@ -0,0 +1,254 @@ +/* zutil.h -- internal interface and configuration of the compression library + * Copyright (C) 1995-2024 Jean-loup Gailly, Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +/* @(#) $Id$ */ + +#ifndef ZUTIL_H +#define ZUTIL_H + +#ifdef HAVE_HIDDEN +# define ZLIB_INTERNAL __attribute__((visibility ("hidden"))) +#else +# define ZLIB_INTERNAL +#endif + +#include "zlib.h" + +#if defined(STDC) && !defined(Z_SOLO) +# if !(defined(_WIN32_WCE) && defined(_MSC_VER)) +# include +# endif +# include +# include +#endif + +#ifndef local +# define local static +#endif +/* since "static" is used to mean two completely different things in C, we + define "local" for the non-static meaning of "static", for readability + (compile with -Dlocal if your debugger can't find static symbols) */ + +typedef unsigned char uch; +typedef uch FAR uchf; +typedef unsigned short ush; +typedef ush FAR ushf; +typedef unsigned long ulg; + +#if !defined(Z_U8) && !defined(Z_SOLO) && defined(STDC) +# include +# if (ULONG_MAX == 0xffffffffffffffff) +# define Z_U8 unsigned long +# elif (ULLONG_MAX == 0xffffffffffffffff) +# define Z_U8 unsigned long long +# elif (UINT_MAX == 0xffffffffffffffff) +# define Z_U8 unsigned +# endif +#endif + +extern z_const char * const z_errmsg[10]; /* indexed by 2-zlib_error */ +/* (size given to avoid silly warnings with Visual C++) */ + +#define ERR_MSG(err) z_errmsg[(err) < -6 || (err) > 2 ? 9 : 2 - (err)] + +#define ERR_RETURN(strm,err) \ + return (strm->msg = ERR_MSG(err), (err)) +/* To be used only when the state is known to be valid */ + + /* common constants */ + +#ifndef DEF_WBITS +# define DEF_WBITS MAX_WBITS +#endif +/* default windowBits for decompression. MAX_WBITS is for compression only */ + +#if MAX_MEM_LEVEL >= 8 +# define DEF_MEM_LEVEL 8 +#else +# define DEF_MEM_LEVEL MAX_MEM_LEVEL +#endif +/* default memLevel */ + +#define STORED_BLOCK 0 +#define STATIC_TREES 1 +#define DYN_TREES 2 +/* The three kinds of block type */ + +#define MIN_MATCH 3 +#define MAX_MATCH 258 +/* The minimum and maximum match lengths */ + +#define PRESET_DICT 0x20 /* preset dictionary flag in zlib header */ + + /* target dependencies */ + +#if defined(MSDOS) || (defined(WINDOWS) && !defined(WIN32)) +# define OS_CODE 0x00 +# ifndef Z_SOLO +# if defined(__TURBOC__) || defined(__BORLANDC__) +# if (__STDC__ == 1) && (defined(__LARGE__) || defined(__COMPACT__)) + /* Allow compilation with ANSI keywords only enabled */ + void _Cdecl farfree( void *block ); + void *_Cdecl farmalloc( unsigned long nbytes ); +# else +# include +# endif +# else /* MSC or DJGPP */ +# include +# endif +# endif +#endif + +#ifdef AMIGA +# define OS_CODE 1 +#endif + +#if defined(VAXC) || defined(VMS) +# define OS_CODE 2 +# define F_OPEN(name, mode) \ + fopen((name), (mode), "mbc=60", "ctx=stm", "rfm=fix", "mrs=512") +#endif + +#ifdef __370__ +# if __TARGET_LIB__ < 0x20000000 +# define OS_CODE 4 +# elif __TARGET_LIB__ < 0x40000000 +# define OS_CODE 11 +# else +# define OS_CODE 8 +# endif +#endif + +#if defined(ATARI) || defined(atarist) +# define OS_CODE 5 +#endif + +#ifdef OS2 +# define OS_CODE 6 +# if defined(M_I86) && !defined(Z_SOLO) +# include +# endif +#endif + +#if defined(MACOS) +# define OS_CODE 7 +#endif + +#ifdef __acorn +# define OS_CODE 13 +#endif + +#if defined(WIN32) && !defined(__CYGWIN__) +# define OS_CODE 10 +#endif + +#ifdef _BEOS_ +# define OS_CODE 16 +#endif + +#ifdef __TOS_OS400__ +# define OS_CODE 18 +#endif + +#ifdef __APPLE__ +# define OS_CODE 19 +#endif + +#if defined(__BORLANDC__) && !defined(MSDOS) + #pragma warn -8004 + #pragma warn -8008 + #pragma warn -8066 +#endif + +/* provide prototypes for these when building zlib without LFS */ +#if !defined(_WIN32) && \ + (!defined(_LARGEFILE64_SOURCE) || _LFS64_LARGEFILE-0 == 0) + ZEXTERN uLong ZEXPORT adler32_combine64(uLong, uLong, z_off_t); + ZEXTERN uLong ZEXPORT crc32_combine64(uLong, uLong, z_off_t); + ZEXTERN uLong ZEXPORT crc32_combine_gen64(z_off_t); +#endif + + /* common defaults */ + +#ifndef OS_CODE +# define OS_CODE 3 /* assume Unix */ +#endif + +#ifndef F_OPEN +# define F_OPEN(name, mode) fopen((name), (mode)) +#endif + + /* functions */ + +#if defined(pyr) || defined(Z_SOLO) +# define NO_MEMCPY +#endif +#if defined(SMALL_MEDIUM) && !defined(_MSC_VER) && !defined(__SC__) + /* Use our own functions for small and medium model with MSC <= 5.0. + * You may have to use the same strategy for Borland C (untested). + * The __SC__ check is for Symantec. + */ +# define NO_MEMCPY +#endif +#if defined(STDC) && !defined(HAVE_MEMCPY) && !defined(NO_MEMCPY) +# define HAVE_MEMCPY +#endif +#ifdef HAVE_MEMCPY +# ifdef SMALL_MEDIUM /* MSDOS small or medium model */ +# define zmemcpy _fmemcpy +# define zmemcmp _fmemcmp +# define zmemzero(dest, len) _fmemset(dest, 0, len) +# else +# define zmemcpy memcpy +# define zmemcmp memcmp +# define zmemzero(dest, len) memset(dest, 0, len) +# endif +#else + void ZLIB_INTERNAL zmemcpy(Bytef* dest, const Bytef* source, uInt len); + int ZLIB_INTERNAL zmemcmp(const Bytef* s1, const Bytef* s2, uInt len); + void ZLIB_INTERNAL zmemzero(Bytef* dest, uInt len); +#endif + +/* Diagnostic functions */ +#ifdef ZLIB_DEBUG +# include + extern int ZLIB_INTERNAL z_verbose; + extern void ZLIB_INTERNAL z_error(char *m); +# define Assert(cond,msg) {if(!(cond)) z_error(msg);} +# define Trace(x) {if (z_verbose>=0) fprintf x ;} +# define Tracev(x) {if (z_verbose>0) fprintf x ;} +# define Tracevv(x) {if (z_verbose>1) fprintf x ;} +# define Tracec(c,x) {if (z_verbose>0 && (c)) fprintf x ;} +# define Tracecv(c,x) {if (z_verbose>1 && (c)) fprintf x ;} +#else +# define Assert(cond,msg) +# define Trace(x) +# define Tracev(x) +# define Tracevv(x) +# define Tracec(c,x) +# define Tracecv(c,x) +#endif + +#ifndef Z_SOLO + voidpf ZLIB_INTERNAL zcalloc(voidpf opaque, unsigned items, + unsigned size); + void ZLIB_INTERNAL zcfree(voidpf opaque, voidpf ptr); +#endif + +#define ZALLOC(strm, items, size) \ + (*((strm)->zalloc))((strm)->opaque, (items), (size)) +#define ZFREE(strm, addr) (*((strm)->zfree))((strm)->opaque, (voidpf)(addr)) +#define TRY_FREE(s, p) {if (p) ZFREE(s, p);} + +/* Reverse the bytes in a 32-bit value */ +#define ZSWAP32(q) ((((q) >> 24) & 0xff) + (((q) >> 8) & 0xff00) + \ + (((q) & 0xff00) << 8) + (((q) & 0xff) << 24)) + +#endif /* ZUTIL_H */ diff --git a/c-blosc/internal-complibs/zstd-1.5.6/common/allocations.h b/c-blosc/internal-complibs/zstd-1.5.6/common/allocations.h new file mode 100644 index 0000000..5e89955 --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.5.6/common/allocations.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/* This file provides custom allocation primitives + */ + +#define ZSTD_DEPS_NEED_MALLOC +#include "zstd_deps.h" /* ZSTD_malloc, ZSTD_calloc, ZSTD_free, ZSTD_memset */ + +#include "compiler.h" /* MEM_STATIC */ +#define ZSTD_STATIC_LINKING_ONLY +#include "../zstd.h" /* ZSTD_customMem */ + +#ifndef ZSTD_ALLOCATIONS_H +#define ZSTD_ALLOCATIONS_H + +/* custom memory allocation functions */ + +MEM_STATIC void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem) +{ + if (customMem.customAlloc) + return customMem.customAlloc(customMem.opaque, size); + return ZSTD_malloc(size); +} + +MEM_STATIC void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem) +{ + if (customMem.customAlloc) { + /* calloc implemented as malloc+memset; + * not as efficient as calloc, but next best guess for custom malloc */ + void* const ptr = customMem.customAlloc(customMem.opaque, size); + ZSTD_memset(ptr, 0, size); + return ptr; + } + return ZSTD_calloc(1, size); +} + +MEM_STATIC void ZSTD_customFree(void* ptr, ZSTD_customMem customMem) +{ + if (ptr!=NULL) { + if (customMem.customFree) + customMem.customFree(customMem.opaque, ptr); + else + ZSTD_free(ptr); + } +} + +#endif /* ZSTD_ALLOCATIONS_H */ diff --git a/c-blosc/internal-complibs/zstd-1.5.6/common/bits.h b/c-blosc/internal-complibs/zstd-1.5.6/common/bits.h new file mode 100644 index 0000000..def56c4 --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.5.6/common/bits.h @@ -0,0 +1,200 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_BITS_H +#define ZSTD_BITS_H + +#include "mem.h" + +MEM_STATIC unsigned ZSTD_countTrailingZeros32_fallback(U32 val) +{ + assert(val != 0); + { + static const U32 DeBruijnBytePos[32] = {0, 1, 28, 2, 29, 14, 24, 3, + 30, 22, 20, 15, 25, 17, 4, 8, + 31, 27, 13, 23, 21, 19, 16, 7, + 26, 12, 18, 6, 11, 5, 10, 9}; + return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >> 27]; + } +} + +MEM_STATIC unsigned ZSTD_countTrailingZeros32(U32 val) +{ + assert(val != 0); +# if defined(_MSC_VER) +# if STATIC_BMI2 == 1 + return (unsigned)_tzcnt_u32(val); +# else + if (val != 0) { + unsigned long r; + _BitScanForward(&r, val); + return (unsigned)r; + } else { + /* Should not reach this code path */ + __assume(0); + } +# endif +# elif defined(__GNUC__) && (__GNUC__ >= 4) + return (unsigned)__builtin_ctz(val); +# else + return ZSTD_countTrailingZeros32_fallback(val); +# endif +} + +MEM_STATIC unsigned ZSTD_countLeadingZeros32_fallback(U32 val) { + assert(val != 0); + { + static const U32 DeBruijnClz[32] = {0, 9, 1, 10, 13, 21, 2, 29, + 11, 14, 16, 18, 22, 25, 3, 30, + 8, 12, 20, 28, 15, 17, 24, 7, + 19, 27, 23, 6, 26, 5, 4, 31}; + val |= val >> 1; + val |= val >> 2; + val |= val >> 4; + val |= val >> 8; + val |= val >> 16; + return 31 - DeBruijnClz[(val * 0x07C4ACDDU) >> 27]; + } +} + +MEM_STATIC unsigned ZSTD_countLeadingZeros32(U32 val) +{ + assert(val != 0); +# if defined(_MSC_VER) +# if STATIC_BMI2 == 1 + return (unsigned)_lzcnt_u32(val); +# else + if (val != 0) { + unsigned long r; + _BitScanReverse(&r, val); + return (unsigned)(31 - r); + } else { + /* Should not reach this code path */ + __assume(0); + } +# endif +# elif defined(__GNUC__) && (__GNUC__ >= 4) + return (unsigned)__builtin_clz(val); +# else + return ZSTD_countLeadingZeros32_fallback(val); +# endif +} + +MEM_STATIC unsigned ZSTD_countTrailingZeros64(U64 val) +{ + assert(val != 0); +# if defined(_MSC_VER) && defined(_WIN64) +# if STATIC_BMI2 == 1 + return (unsigned)_tzcnt_u64(val); +# else + if (val != 0) { + unsigned long r; + _BitScanForward64(&r, val); + return (unsigned)r; + } else { + /* Should not reach this code path */ + __assume(0); + } +# endif +# elif defined(__GNUC__) && (__GNUC__ >= 4) && defined(__LP64__) + return (unsigned)__builtin_ctzll(val); +# else + { + U32 mostSignificantWord = (U32)(val >> 32); + U32 leastSignificantWord = (U32)val; + if (leastSignificantWord == 0) { + return 32 + ZSTD_countTrailingZeros32(mostSignificantWord); + } else { + return ZSTD_countTrailingZeros32(leastSignificantWord); + } + } +# endif +} + +MEM_STATIC unsigned ZSTD_countLeadingZeros64(U64 val) +{ + assert(val != 0); +# if defined(_MSC_VER) && defined(_WIN64) +# if STATIC_BMI2 == 1 + return (unsigned)_lzcnt_u64(val); +# else + if (val != 0) { + unsigned long r; + _BitScanReverse64(&r, val); + return (unsigned)(63 - r); + } else { + /* Should not reach this code path */ + __assume(0); + } +# endif +# elif defined(__GNUC__) && (__GNUC__ >= 4) + return (unsigned)(__builtin_clzll(val)); +# else + { + U32 mostSignificantWord = (U32)(val >> 32); + U32 leastSignificantWord = (U32)val; + if (mostSignificantWord == 0) { + return 32 + ZSTD_countLeadingZeros32(leastSignificantWord); + } else { + return ZSTD_countLeadingZeros32(mostSignificantWord); + } + } +# endif +} + +MEM_STATIC unsigned ZSTD_NbCommonBytes(size_t val) +{ + if (MEM_isLittleEndian()) { + if (MEM_64bits()) { + return ZSTD_countTrailingZeros64((U64)val) >> 3; + } else { + return ZSTD_countTrailingZeros32((U32)val) >> 3; + } + } else { /* Big Endian CPU */ + if (MEM_64bits()) { + return ZSTD_countLeadingZeros64((U64)val) >> 3; + } else { + return ZSTD_countLeadingZeros32((U32)val) >> 3; + } + } +} + +MEM_STATIC unsigned ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus */ +{ + assert(val != 0); + return 31 - ZSTD_countLeadingZeros32(val); +} + +/* ZSTD_rotateRight_*(): + * Rotates a bitfield to the right by "count" bits. + * https://en.wikipedia.org/w/index.php?title=Circular_shift&oldid=991635599#Implementing_circular_shifts + */ +MEM_STATIC +U64 ZSTD_rotateRight_U64(U64 const value, U32 count) { + assert(count < 64); + count &= 0x3F; /* for fickle pattern recognition */ + return (value >> count) | (U64)(value << ((0U - count) & 0x3F)); +} + +MEM_STATIC +U32 ZSTD_rotateRight_U32(U32 const value, U32 count) { + assert(count < 32); + count &= 0x1F; /* for fickle pattern recognition */ + return (value >> count) | (U32)(value << ((0U - count) & 0x1F)); +} + +MEM_STATIC +U16 ZSTD_rotateRight_U16(U16 const value, U32 count) { + assert(count < 16); + count &= 0x0F; /* for fickle pattern recognition */ + return (value >> count) | (U16)(value << ((0U - count) & 0x0F)); +} + +#endif /* ZSTD_BITS_H */ diff --git a/c-blosc/internal-complibs/zstd-1.5.6/common/bitstream.h b/c-blosc/internal-complibs/zstd-1.5.6/common/bitstream.h new file mode 100644 index 0000000..6760449 --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.5.6/common/bitstream.h @@ -0,0 +1,457 @@ +/* ****************************************************************** + * bitstream + * Part of FSE library + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * You can contact the author at : + * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ +#ifndef BITSTREAM_H_MODULE +#define BITSTREAM_H_MODULE + +#if defined (__cplusplus) +extern "C" { +#endif +/* +* This API consists of small unitary functions, which must be inlined for best performance. +* Since link-time-optimization is not available for all compilers, +* these functions are defined into a .h to be included. +*/ + +/*-**************************************** +* Dependencies +******************************************/ +#include "mem.h" /* unaligned access routines */ +#include "compiler.h" /* UNLIKELY() */ +#include "debug.h" /* assert(), DEBUGLOG(), RAWLOG() */ +#include "error_private.h" /* error codes and messages */ +#include "bits.h" /* ZSTD_highbit32 */ + + +/*========================================= +* Target specific +=========================================*/ +#ifndef ZSTD_NO_INTRINSICS +# if (defined(__BMI__) || defined(__BMI2__)) && defined(__GNUC__) +# include /* support for bextr (experimental)/bzhi */ +# elif defined(__ICCARM__) +# include +# endif +#endif + +#define STREAM_ACCUMULATOR_MIN_32 25 +#define STREAM_ACCUMULATOR_MIN_64 57 +#define STREAM_ACCUMULATOR_MIN ((U32)(MEM_32bits() ? STREAM_ACCUMULATOR_MIN_32 : STREAM_ACCUMULATOR_MIN_64)) + + +/*-****************************************** +* bitStream encoding API (write forward) +********************************************/ +/* bitStream can mix input from multiple sources. + * A critical property of these streams is that they encode and decode in **reverse** direction. + * So the first bit sequence you add will be the last to be read, like a LIFO stack. + */ +typedef struct { + size_t bitContainer; + unsigned bitPos; + char* startPtr; + char* ptr; + char* endPtr; +} BIT_CStream_t; + +MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* dstBuffer, size_t dstCapacity); +MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits); +MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC); +MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC); + +/* Start with initCStream, providing the size of buffer to write into. +* bitStream will never write outside of this buffer. +* `dstCapacity` must be >= sizeof(bitD->bitContainer), otherwise @return will be an error code. +* +* bits are first added to a local register. +* Local register is size_t, hence 64-bits on 64-bits systems, or 32-bits on 32-bits systems. +* Writing data into memory is an explicit operation, performed by the flushBits function. +* Hence keep track how many bits are potentially stored into local register to avoid register overflow. +* After a flushBits, a maximum of 7 bits might still be stored into local register. +* +* Avoid storing elements of more than 24 bits if you want compatibility with 32-bits bitstream readers. +* +* Last operation is to close the bitStream. +* The function returns the final size of CStream in bytes. +* If data couldn't fit into `dstBuffer`, it will return a 0 ( == not storable) +*/ + + +/*-******************************************** +* bitStream decoding API (read backward) +**********************************************/ +typedef size_t BitContainerType; +typedef struct { + BitContainerType bitContainer; + unsigned bitsConsumed; + const char* ptr; + const char* start; + const char* limitPtr; +} BIT_DStream_t; + +typedef enum { BIT_DStream_unfinished = 0, /* fully refilled */ + BIT_DStream_endOfBuffer = 1, /* still some bits left in bitstream */ + BIT_DStream_completed = 2, /* bitstream entirely consumed, bit-exact */ + BIT_DStream_overflow = 3 /* user requested more bits than present in bitstream */ + } BIT_DStream_status; /* result of BIT_reloadDStream() */ + +MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize); +MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits); +MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD); +MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD); + + +/* Start by invoking BIT_initDStream(). +* A chunk of the bitStream is then stored into a local register. +* Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (BitContainerType). +* You can then retrieve bitFields stored into the local register, **in reverse order**. +* Local register is explicitly reloaded from memory by the BIT_reloadDStream() method. +* A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BIT_DStream_unfinished. +* Otherwise, it can be less than that, so proceed accordingly. +* Checking if DStream has reached its end can be performed with BIT_endOfDStream(). +*/ + + +/*-**************************************** +* unsafe API +******************************************/ +MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBits); +/* faster, but works only if value is "clean", meaning all high bits above nbBits are 0 */ + +MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC); +/* unsafe version; does not check buffer overflow */ + +MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits); +/* faster, but works only if nbBits >= 1 */ + +/*===== Local Constants =====*/ +static const unsigned BIT_mask[] = { + 0, 1, 3, 7, 0xF, 0x1F, + 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF, + 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF, + 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF, + 0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF, 0x7FFFFFF, 0xFFFFFFF, 0x1FFFFFFF, + 0x3FFFFFFF, 0x7FFFFFFF}; /* up to 31 bits */ +#define BIT_MASK_SIZE (sizeof(BIT_mask) / sizeof(BIT_mask[0])) + +/*-************************************************************** +* bitStream encoding +****************************************************************/ +/*! BIT_initCStream() : + * `dstCapacity` must be > sizeof(size_t) + * @return : 0 if success, + * otherwise an error code (can be tested using ERR_isError()) */ +MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, + void* startPtr, size_t dstCapacity) +{ + bitC->bitContainer = 0; + bitC->bitPos = 0; + bitC->startPtr = (char*)startPtr; + bitC->ptr = bitC->startPtr; + bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer); + if (dstCapacity <= sizeof(bitC->bitContainer)) return ERROR(dstSize_tooSmall); + return 0; +} + +FORCE_INLINE_TEMPLATE size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits) +{ +#if defined(STATIC_BMI2) && STATIC_BMI2 == 1 && !defined(ZSTD_NO_INTRINSICS) + return _bzhi_u64(bitContainer, nbBits); +#else + assert(nbBits < BIT_MASK_SIZE); + return bitContainer & BIT_mask[nbBits]; +#endif +} + +/*! BIT_addBits() : + * can add up to 31 bits into `bitC`. + * Note : does not check for register overflow ! */ +MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, + size_t value, unsigned nbBits) +{ + DEBUG_STATIC_ASSERT(BIT_MASK_SIZE == 32); + assert(nbBits < BIT_MASK_SIZE); + assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8); + bitC->bitContainer |= BIT_getLowerBits(value, nbBits) << bitC->bitPos; + bitC->bitPos += nbBits; +} + +/*! BIT_addBitsFast() : + * works only if `value` is _clean_, + * meaning all high bits above nbBits are 0 */ +MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, + size_t value, unsigned nbBits) +{ + assert((value>>nbBits) == 0); + assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8); + bitC->bitContainer |= value << bitC->bitPos; + bitC->bitPos += nbBits; +} + +/*! BIT_flushBitsFast() : + * assumption : bitContainer has not overflowed + * unsafe version; does not check buffer overflow */ +MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC) +{ + size_t const nbBytes = bitC->bitPos >> 3; + assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8); + assert(bitC->ptr <= bitC->endPtr); + MEM_writeLEST(bitC->ptr, bitC->bitContainer); + bitC->ptr += nbBytes; + bitC->bitPos &= 7; + bitC->bitContainer >>= nbBytes*8; +} + +/*! BIT_flushBits() : + * assumption : bitContainer has not overflowed + * safe version; check for buffer overflow, and prevents it. + * note : does not signal buffer overflow. + * overflow will be revealed later on using BIT_closeCStream() */ +MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC) +{ + size_t const nbBytes = bitC->bitPos >> 3; + assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8); + assert(bitC->ptr <= bitC->endPtr); + MEM_writeLEST(bitC->ptr, bitC->bitContainer); + bitC->ptr += nbBytes; + if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr; + bitC->bitPos &= 7; + bitC->bitContainer >>= nbBytes*8; +} + +/*! BIT_closeCStream() : + * @return : size of CStream, in bytes, + * or 0 if it could not fit into dstBuffer */ +MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC) +{ + BIT_addBitsFast(bitC, 1, 1); /* endMark */ + BIT_flushBits(bitC); + if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */ + return (bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0); +} + + +/*-******************************************************** +* bitStream decoding +**********************************************************/ +/*! BIT_initDStream() : + * Initialize a BIT_DStream_t. + * `bitD` : a pointer to an already allocated BIT_DStream_t structure. + * `srcSize` must be the *exact* size of the bitStream, in bytes. + * @return : size of stream (== srcSize), or an errorCode if a problem is detected + */ +MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize) +{ + if (srcSize < 1) { ZSTD_memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); } + + bitD->start = (const char*)srcBuffer; + bitD->limitPtr = bitD->start + sizeof(bitD->bitContainer); + + if (srcSize >= sizeof(bitD->bitContainer)) { /* normal case */ + bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer); + bitD->bitContainer = MEM_readLEST(bitD->ptr); + { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1]; + bitD->bitsConsumed = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */ + if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ } + } else { + bitD->ptr = bitD->start; + bitD->bitContainer = *(const BYTE*)(bitD->start); + switch(srcSize) + { + case 7: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16); + ZSTD_FALLTHROUGH; + + case 6: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24); + ZSTD_FALLTHROUGH; + + case 5: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32); + ZSTD_FALLTHROUGH; + + case 4: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[3]) << 24; + ZSTD_FALLTHROUGH; + + case 3: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[2]) << 16; + ZSTD_FALLTHROUGH; + + case 2: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[1]) << 8; + ZSTD_FALLTHROUGH; + + default: break; + } + { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1]; + bitD->bitsConsumed = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0; + if (lastByte == 0) return ERROR(corruption_detected); /* endMark not present */ + } + bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8; + } + + return srcSize; +} + +FORCE_INLINE_TEMPLATE size_t BIT_getUpperBits(BitContainerType bitContainer, U32 const start) +{ + return bitContainer >> start; +} + +FORCE_INLINE_TEMPLATE size_t BIT_getMiddleBits(BitContainerType bitContainer, U32 const start, U32 const nbBits) +{ + U32 const regMask = sizeof(bitContainer)*8 - 1; + /* if start > regMask, bitstream is corrupted, and result is undefined */ + assert(nbBits < BIT_MASK_SIZE); + /* x86 transform & ((1 << nbBits) - 1) to bzhi instruction, it is better + * than accessing memory. When bmi2 instruction is not present, we consider + * such cpus old (pre-Haswell, 2013) and their performance is not of that + * importance. + */ +#if defined(__x86_64__) || defined(_M_X86) + return (bitContainer >> (start & regMask)) & ((((U64)1) << nbBits) - 1); +#else + return (bitContainer >> (start & regMask)) & BIT_mask[nbBits]; +#endif +} + +/*! BIT_lookBits() : + * Provides next n bits from local register. + * local register is not modified. + * On 32-bits, maxNbBits==24. + * On 64-bits, maxNbBits==56. + * @return : value extracted */ +FORCE_INLINE_TEMPLATE size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits) +{ + /* arbitrate between double-shift and shift+mask */ +#if 1 + /* if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8, + * bitstream is likely corrupted, and result is undefined */ + return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits); +#else + /* this code path is slower on my os-x laptop */ + U32 const regMask = sizeof(bitD->bitContainer)*8 - 1; + return ((bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> 1) >> ((regMask-nbBits) & regMask); +#endif +} + +/*! BIT_lookBitsFast() : + * unsafe version; only works if nbBits >= 1 */ +MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits) +{ + U32 const regMask = sizeof(bitD->bitContainer)*8 - 1; + assert(nbBits >= 1); + return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask); +} + +FORCE_INLINE_TEMPLATE void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits) +{ + bitD->bitsConsumed += nbBits; +} + +/*! BIT_readBits() : + * Read (consume) next n bits from local register and update. + * Pay attention to not read more than nbBits contained into local register. + * @return : extracted value. */ +FORCE_INLINE_TEMPLATE size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits) +{ + size_t const value = BIT_lookBits(bitD, nbBits); + BIT_skipBits(bitD, nbBits); + return value; +} + +/*! BIT_readBitsFast() : + * unsafe version; only works if nbBits >= 1 */ +MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits) +{ + size_t const value = BIT_lookBitsFast(bitD, nbBits); + assert(nbBits >= 1); + BIT_skipBits(bitD, nbBits); + return value; +} + +/*! BIT_reloadDStream_internal() : + * Simple variant of BIT_reloadDStream(), with two conditions: + * 1. bitstream is valid : bitsConsumed <= sizeof(bitD->bitContainer)*8 + * 2. look window is valid after shifted down : bitD->ptr >= bitD->start + */ +MEM_STATIC BIT_DStream_status BIT_reloadDStream_internal(BIT_DStream_t* bitD) +{ + assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8); + bitD->ptr -= bitD->bitsConsumed >> 3; + assert(bitD->ptr >= bitD->start); + bitD->bitsConsumed &= 7; + bitD->bitContainer = MEM_readLEST(bitD->ptr); + return BIT_DStream_unfinished; +} + +/*! BIT_reloadDStreamFast() : + * Similar to BIT_reloadDStream(), but with two differences: + * 1. bitsConsumed <= sizeof(bitD->bitContainer)*8 must hold! + * 2. Returns BIT_DStream_overflow when bitD->ptr < bitD->limitPtr, at this + * point you must use BIT_reloadDStream() to reload. + */ +MEM_STATIC BIT_DStream_status BIT_reloadDStreamFast(BIT_DStream_t* bitD) +{ + if (UNLIKELY(bitD->ptr < bitD->limitPtr)) + return BIT_DStream_overflow; + return BIT_reloadDStream_internal(bitD); +} + +/*! BIT_reloadDStream() : + * Refill `bitD` from buffer previously set in BIT_initDStream() . + * This function is safe, it guarantees it will not never beyond src buffer. + * @return : status of `BIT_DStream_t` internal register. + * when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */ +FORCE_INLINE_TEMPLATE BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD) +{ + /* note : once in overflow mode, a bitstream remains in this mode until it's reset */ + if (UNLIKELY(bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))) { + static const BitContainerType zeroFilled = 0; + bitD->ptr = (const char*)&zeroFilled; /* aliasing is allowed for char */ + /* overflow detected, erroneous scenario or end of stream: no update */ + return BIT_DStream_overflow; + } + + assert(bitD->ptr >= bitD->start); + + if (bitD->ptr >= bitD->limitPtr) { + return BIT_reloadDStream_internal(bitD); + } + if (bitD->ptr == bitD->start) { + /* reached end of bitStream => no update */ + if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer; + return BIT_DStream_completed; + } + /* start < ptr < limitPtr => cautious update */ + { U32 nbBytes = bitD->bitsConsumed >> 3; + BIT_DStream_status result = BIT_DStream_unfinished; + if (bitD->ptr - nbBytes < bitD->start) { + nbBytes = (U32)(bitD->ptr - bitD->start); /* ptr > start */ + result = BIT_DStream_endOfBuffer; + } + bitD->ptr -= nbBytes; + bitD->bitsConsumed -= nbBytes*8; + bitD->bitContainer = MEM_readLEST(bitD->ptr); /* reminder : srcSize > sizeof(bitD->bitContainer), otherwise bitD->ptr == bitD->start */ + return result; + } +} + +/*! BIT_endOfDStream() : + * @return : 1 if DStream has _exactly_ reached its end (all bits consumed). + */ +MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream) +{ + return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8)); +} + +#if defined (__cplusplus) +} +#endif + +#endif /* BITSTREAM_H_MODULE */ diff --git a/c-blosc/internal-complibs/zstd-1.5.6/common/compiler.h b/c-blosc/internal-complibs/zstd-1.5.6/common/compiler.h new file mode 100644 index 0000000..31880ec --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.5.6/common/compiler.h @@ -0,0 +1,450 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_COMPILER_H +#define ZSTD_COMPILER_H + +#include + +#include "portability_macros.h" + +/*-******************************************************* +* Compiler specifics +*********************************************************/ +/* force inlining */ + +#if !defined(ZSTD_NO_INLINE) +#if (defined(__GNUC__) && !defined(__STRICT_ANSI__)) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ +# define INLINE_KEYWORD inline +#else +# define INLINE_KEYWORD +#endif + +#if defined(__GNUC__) || defined(__ICCARM__) +# define FORCE_INLINE_ATTR __attribute__((always_inline)) +#elif defined(_MSC_VER) +# define FORCE_INLINE_ATTR __forceinline +#else +# define FORCE_INLINE_ATTR +#endif + +#else + +#define INLINE_KEYWORD +#define FORCE_INLINE_ATTR + +#endif + +/** + On MSVC qsort requires that functions passed into it use the __cdecl calling conversion(CC). + This explicitly marks such functions as __cdecl so that the code will still compile + if a CC other than __cdecl has been made the default. +*/ +#if defined(_MSC_VER) +# define WIN_CDECL __cdecl +#else +# define WIN_CDECL +#endif + +/* UNUSED_ATTR tells the compiler it is okay if the function is unused. */ +#if defined(__GNUC__) +# define UNUSED_ATTR __attribute__((unused)) +#else +# define UNUSED_ATTR +#endif + +/** + * FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant + * parameters. They must be inlined for the compiler to eliminate the constant + * branches. + */ +#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR UNUSED_ATTR +/** + * HINT_INLINE is used to help the compiler generate better code. It is *not* + * used for "templates", so it can be tweaked based on the compilers + * performance. + * + * gcc-4.8 and gcc-4.9 have been shown to benefit from leaving off the + * always_inline attribute. + * + * clang up to 5.0.0 (trunk) benefit tremendously from the always_inline + * attribute. + */ +#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 4 && __GNUC_MINOR__ >= 8 && __GNUC__ < 5 +# define HINT_INLINE static INLINE_KEYWORD +#else +# define HINT_INLINE FORCE_INLINE_TEMPLATE +#endif + +/* "soft" inline : + * The compiler is free to select if it's a good idea to inline or not. + * The main objective is to silence compiler warnings + * when a defined function in included but not used. + * + * Note : this macro is prefixed `MEM_` because it used to be provided by `mem.h` unit. + * Updating the prefix is probably preferable, but requires a fairly large codemod, + * since this name is used everywhere. + */ +#ifndef MEM_STATIC /* already defined in Linux Kernel mem.h */ +#if defined(__GNUC__) +# define MEM_STATIC static __inline UNUSED_ATTR +#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +# define MEM_STATIC static inline +#elif defined(_MSC_VER) +# define MEM_STATIC static __inline +#else +# define MEM_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */ +#endif +#endif + +/* force no inlining */ +#ifdef _MSC_VER +# define FORCE_NOINLINE static __declspec(noinline) +#else +# if defined(__GNUC__) || defined(__ICCARM__) +# define FORCE_NOINLINE static __attribute__((__noinline__)) +# else +# define FORCE_NOINLINE static +# endif +#endif + + +/* target attribute */ +#if defined(__GNUC__) || defined(__ICCARM__) +# define TARGET_ATTRIBUTE(target) __attribute__((__target__(target))) +#else +# define TARGET_ATTRIBUTE(target) +#endif + +/* Target attribute for BMI2 dynamic dispatch. + * Enable lzcnt, bmi, and bmi2. + * We test for bmi1 & bmi2. lzcnt is included in bmi1. + */ +#define BMI2_TARGET_ATTRIBUTE TARGET_ATTRIBUTE("lzcnt,bmi,bmi2") + +/* prefetch + * can be disabled, by declaring NO_PREFETCH build macro */ +#if defined(NO_PREFETCH) +# define PREFETCH_L1(ptr) do { (void)(ptr); } while (0) /* disabled */ +# define PREFETCH_L2(ptr) do { (void)(ptr); } while (0) /* disabled */ +#else +# if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) && !defined(_M_ARM64EC) /* _mm_prefetch() is not defined outside of x86/x64 */ +# include /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */ +# define PREFETCH_L1(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0) +# define PREFETCH_L2(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T1) +# elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) ) +# define PREFETCH_L1(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */) +# define PREFETCH_L2(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */) +# elif defined(__aarch64__) +# define PREFETCH_L1(ptr) do { __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr))); } while (0) +# define PREFETCH_L2(ptr) do { __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr))); } while (0) +# else +# define PREFETCH_L1(ptr) do { (void)(ptr); } while (0) /* disabled */ +# define PREFETCH_L2(ptr) do { (void)(ptr); } while (0) /* disabled */ +# endif +#endif /* NO_PREFETCH */ + +#define CACHELINE_SIZE 64 + +#define PREFETCH_AREA(p, s) \ + do { \ + const char* const _ptr = (const char*)(p); \ + size_t const _size = (size_t)(s); \ + size_t _pos; \ + for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) { \ + PREFETCH_L2(_ptr + _pos); \ + } \ + } while (0) + +/* vectorization + * older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax, + * and some compilers, like Intel ICC and MCST LCC, do not support it at all. */ +#if !defined(__INTEL_COMPILER) && !defined(__clang__) && defined(__GNUC__) && !defined(__LCC__) +# if (__GNUC__ == 4 && __GNUC_MINOR__ > 3) || (__GNUC__ >= 5) +# define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize"))) +# else +# define DONT_VECTORIZE _Pragma("GCC optimize(\"no-tree-vectorize\")") +# endif +#else +# define DONT_VECTORIZE +#endif + +/* Tell the compiler that a branch is likely or unlikely. + * Only use these macros if it causes the compiler to generate better code. + * If you can remove a LIKELY/UNLIKELY annotation without speed changes in gcc + * and clang, please do. + */ +#if defined(__GNUC__) +#define LIKELY(x) (__builtin_expect((x), 1)) +#define UNLIKELY(x) (__builtin_expect((x), 0)) +#else +#define LIKELY(x) (x) +#define UNLIKELY(x) (x) +#endif + +#if __has_builtin(__builtin_unreachable) || (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5))) +# define ZSTD_UNREACHABLE do { assert(0), __builtin_unreachable(); } while (0) +#else +# define ZSTD_UNREACHABLE do { assert(0); } while (0) +#endif + +/* disable warnings */ +#ifdef _MSC_VER /* Visual Studio */ +# include /* For Visual 2005 */ +# pragma warning(disable : 4100) /* disable: C4100: unreferenced formal parameter */ +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +# pragma warning(disable : 4204) /* disable: C4204: non-constant aggregate initializer */ +# pragma warning(disable : 4214) /* disable: C4214: non-int bitfields */ +# pragma warning(disable : 4324) /* disable: C4324: padded structure */ +#endif + +/*Like DYNAMIC_BMI2 but for compile time determination of BMI2 support*/ +#ifndef STATIC_BMI2 +# if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) +# ifdef __AVX2__ //MSVC does not have a BMI2 specific flag, but every CPU that supports AVX2 also supports BMI2 +# define STATIC_BMI2 1 +# endif +# elif defined(__BMI2__) && defined(__x86_64__) && defined(__GNUC__) +# define STATIC_BMI2 1 +# endif +#endif + +#ifndef STATIC_BMI2 + #define STATIC_BMI2 0 +#endif + +/* compile time determination of SIMD support */ +#if !defined(ZSTD_NO_INTRINSICS) +# if defined(__SSE2__) || defined(_M_AMD64) || (defined (_M_IX86) && defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) +# define ZSTD_ARCH_X86_SSE2 +# endif +# if defined(__ARM_NEON) || defined(_M_ARM64) +# define ZSTD_ARCH_ARM_NEON +# endif +# +# if defined(ZSTD_ARCH_X86_SSE2) +# include +# elif defined(ZSTD_ARCH_ARM_NEON) +# include +# endif +#endif + +/* C-language Attributes are added in C23. */ +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ > 201710L) && defined(__has_c_attribute) +# define ZSTD_HAS_C_ATTRIBUTE(x) __has_c_attribute(x) +#else +# define ZSTD_HAS_C_ATTRIBUTE(x) 0 +#endif + +/* Only use C++ attributes in C++. Some compilers report support for C++ + * attributes when compiling with C. + */ +#if defined(__cplusplus) && defined(__has_cpp_attribute) +# define ZSTD_HAS_CPP_ATTRIBUTE(x) __has_cpp_attribute(x) +#else +# define ZSTD_HAS_CPP_ATTRIBUTE(x) 0 +#endif + +/* Define ZSTD_FALLTHROUGH macro for annotating switch case with the 'fallthrough' attribute. + * - C23: https://en.cppreference.com/w/c/language/attributes/fallthrough + * - CPP17: https://en.cppreference.com/w/cpp/language/attributes/fallthrough + * - Else: __attribute__((__fallthrough__)) + */ +#ifndef ZSTD_FALLTHROUGH +# if ZSTD_HAS_C_ATTRIBUTE(fallthrough) +# define ZSTD_FALLTHROUGH [[fallthrough]] +# elif ZSTD_HAS_CPP_ATTRIBUTE(fallthrough) +# define ZSTD_FALLTHROUGH [[fallthrough]] +# elif __has_attribute(__fallthrough__) +/* Leading semicolon is to satisfy gcc-11 with -pedantic. Without the semicolon + * gcc complains about: a label can only be part of a statement and a declaration is not a statement. + */ +# define ZSTD_FALLTHROUGH ; __attribute__((__fallthrough__)) +# else +# define ZSTD_FALLTHROUGH +# endif +#endif + +/*-************************************************************** +* Alignment check +*****************************************************************/ + +/* this test was initially positioned in mem.h, + * but this file is removed (or replaced) for linux kernel + * so it's now hosted in compiler.h, + * which remains valid for both user & kernel spaces. + */ + +#ifndef ZSTD_ALIGNOF +# if defined(__GNUC__) || defined(_MSC_VER) +/* covers gcc, clang & MSVC */ +/* note : this section must come first, before C11, + * due to a limitation in the kernel source generator */ +# define ZSTD_ALIGNOF(T) __alignof(T) + +# elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) +/* C11 support */ +# include +# define ZSTD_ALIGNOF(T) alignof(T) + +# else +/* No known support for alignof() - imperfect backup */ +# define ZSTD_ALIGNOF(T) (sizeof(void*) < sizeof(T) ? sizeof(void*) : sizeof(T)) + +# endif +#endif /* ZSTD_ALIGNOF */ + +/*-************************************************************** +* Sanitizer +*****************************************************************/ + +/** + * Zstd relies on pointer overflow in its decompressor. + * We add this attribute to functions that rely on pointer overflow. + */ +#ifndef ZSTD_ALLOW_POINTER_OVERFLOW_ATTR +# if __has_attribute(no_sanitize) +# if !defined(__clang__) && defined(__GNUC__) && __GNUC__ < 8 + /* gcc < 8 only has signed-integer-overlow which triggers on pointer overflow */ +# define ZSTD_ALLOW_POINTER_OVERFLOW_ATTR __attribute__((no_sanitize("signed-integer-overflow"))) +# else + /* older versions of clang [3.7, 5.0) will warn that pointer-overflow is ignored. */ +# define ZSTD_ALLOW_POINTER_OVERFLOW_ATTR __attribute__((no_sanitize("pointer-overflow"))) +# endif +# else +# define ZSTD_ALLOW_POINTER_OVERFLOW_ATTR +# endif +#endif + +/** + * Helper function to perform a wrapped pointer difference without trigging + * UBSAN. + * + * @returns lhs - rhs with wrapping + */ +MEM_STATIC +ZSTD_ALLOW_POINTER_OVERFLOW_ATTR +ptrdiff_t ZSTD_wrappedPtrDiff(unsigned char const* lhs, unsigned char const* rhs) +{ + return lhs - rhs; +} + +/** + * Helper function to perform a wrapped pointer add without triggering UBSAN. + * + * @return ptr + add with wrapping + */ +MEM_STATIC +ZSTD_ALLOW_POINTER_OVERFLOW_ATTR +unsigned char const* ZSTD_wrappedPtrAdd(unsigned char const* ptr, ptrdiff_t add) +{ + return ptr + add; +} + +/** + * Helper function to perform a wrapped pointer subtraction without triggering + * UBSAN. + * + * @return ptr - sub with wrapping + */ +MEM_STATIC +ZSTD_ALLOW_POINTER_OVERFLOW_ATTR +unsigned char const* ZSTD_wrappedPtrSub(unsigned char const* ptr, ptrdiff_t sub) +{ + return ptr - sub; +} + +/** + * Helper function to add to a pointer that works around C's undefined behavior + * of adding 0 to NULL. + * + * @returns `ptr + add` except it defines `NULL + 0 == NULL`. + */ +MEM_STATIC +unsigned char* ZSTD_maybeNullPtrAdd(unsigned char* ptr, ptrdiff_t add) +{ + return add > 0 ? ptr + add : ptr; +} + +/* Issue #3240 reports an ASAN failure on an llvm-mingw build. Out of an + * abundance of caution, disable our custom poisoning on mingw. */ +#ifdef __MINGW32__ +#ifndef ZSTD_ASAN_DONT_POISON_WORKSPACE +#define ZSTD_ASAN_DONT_POISON_WORKSPACE 1 +#endif +#ifndef ZSTD_MSAN_DONT_POISON_WORKSPACE +#define ZSTD_MSAN_DONT_POISON_WORKSPACE 1 +#endif +#endif + +#if ZSTD_MEMORY_SANITIZER && !defined(ZSTD_MSAN_DONT_POISON_WORKSPACE) +/* Not all platforms that support msan provide sanitizers/msan_interface.h. + * We therefore declare the functions we need ourselves, rather than trying to + * include the header file... */ +#include /* size_t */ +#define ZSTD_DEPS_NEED_STDINT +#include "zstd_deps.h" /* intptr_t */ + +/* Make memory region fully initialized (without changing its contents). */ +void __msan_unpoison(const volatile void *a, size_t size); + +/* Make memory region fully uninitialized (without changing its contents). + This is a legacy interface that does not update origin information. Use + __msan_allocated_memory() instead. */ +void __msan_poison(const volatile void *a, size_t size); + +/* Returns the offset of the first (at least partially) poisoned byte in the + memory range, or -1 if the whole range is good. */ +intptr_t __msan_test_shadow(const volatile void *x, size_t size); + +/* Print shadow and origin for the memory range to stderr in a human-readable + format. */ +void __msan_print_shadow(const volatile void *x, size_t size); +#endif + +#if ZSTD_ADDRESS_SANITIZER && !defined(ZSTD_ASAN_DONT_POISON_WORKSPACE) +/* Not all platforms that support asan provide sanitizers/asan_interface.h. + * We therefore declare the functions we need ourselves, rather than trying to + * include the header file... */ +#include /* size_t */ + +/** + * Marks a memory region ([addr, addr+size)) as unaddressable. + * + * This memory must be previously allocated by your program. Instrumented + * code is forbidden from accessing addresses in this region until it is + * unpoisoned. This function is not guaranteed to poison the entire region - + * it could poison only a subregion of [addr, addr+size) due to ASan + * alignment restrictions. + * + * \note This function is not thread-safe because no two threads can poison or + * unpoison memory in the same memory region simultaneously. + * + * \param addr Start of memory region. + * \param size Size of memory region. */ +void __asan_poison_memory_region(void const volatile *addr, size_t size); + +/** + * Marks a memory region ([addr, addr+size)) as addressable. + * + * This memory must be previously allocated by your program. Accessing + * addresses in this region is allowed until this region is poisoned again. + * This function could unpoison a super-region of [addr, addr+size) due + * to ASan alignment restrictions. + * + * \note This function is not thread-safe because no two threads can + * poison or unpoison memory in the same memory region simultaneously. + * + * \param addr Start of memory region. + * \param size Size of memory region. */ +void __asan_unpoison_memory_region(void const volatile *addr, size_t size); +#endif + +#endif /* ZSTD_COMPILER_H */ diff --git a/c-blosc/internal-complibs/zstd-1.5.6/common/cpu.h b/c-blosc/internal-complibs/zstd-1.5.6/common/cpu.h new file mode 100644 index 0000000..0e684d9 --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.5.6/common/cpu.h @@ -0,0 +1,249 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_COMMON_CPU_H +#define ZSTD_COMMON_CPU_H + +/** + * Implementation taken from folly/CpuId.h + * https://github.com/facebook/folly/blob/master/folly/CpuId.h + */ + +#include "mem.h" + +#ifdef _MSC_VER +#include +#endif + +typedef struct { + U32 f1c; + U32 f1d; + U32 f7b; + U32 f7c; +} ZSTD_cpuid_t; + +MEM_STATIC ZSTD_cpuid_t ZSTD_cpuid(void) { + U32 f1c = 0; + U32 f1d = 0; + U32 f7b = 0; + U32 f7c = 0; +#if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86)) +#if !defined(__clang__) + int reg[4]; + __cpuid((int*)reg, 0); + { + int const n = reg[0]; + if (n >= 1) { + __cpuid((int*)reg, 1); + f1c = (U32)reg[2]; + f1d = (U32)reg[3]; + } + if (n >= 7) { + __cpuidex((int*)reg, 7, 0); + f7b = (U32)reg[1]; + f7c = (U32)reg[2]; + } + } +#else + /* Clang compiler has a bug (fixed in https://reviews.llvm.org/D101338) in + * which the `__cpuid` intrinsic does not save and restore `rbx` as it needs + * to due to being a reserved register. So in that case, do the `cpuid` + * ourselves. Clang supports inline assembly anyway. + */ + U32 n; + __asm__( + "pushq %%rbx\n\t" + "cpuid\n\t" + "popq %%rbx\n\t" + : "=a"(n) + : "a"(0) + : "rcx", "rdx"); + if (n >= 1) { + U32 f1a; + __asm__( + "pushq %%rbx\n\t" + "cpuid\n\t" + "popq %%rbx\n\t" + : "=a"(f1a), "=c"(f1c), "=d"(f1d) + : "a"(1) + :); + } + if (n >= 7) { + __asm__( + "pushq %%rbx\n\t" + "cpuid\n\t" + "movq %%rbx, %%rax\n\t" + "popq %%rbx" + : "=a"(f7b), "=c"(f7c) + : "a"(7), "c"(0) + : "rdx"); + } +#endif +#elif defined(__i386__) && defined(__PIC__) && !defined(__clang__) && defined(__GNUC__) + /* The following block like the normal cpuid branch below, but gcc + * reserves ebx for use of its pic register so we must specially + * handle the save and restore to avoid clobbering the register + */ + U32 n; + __asm__( + "pushl %%ebx\n\t" + "cpuid\n\t" + "popl %%ebx\n\t" + : "=a"(n) + : "a"(0) + : "ecx", "edx"); + if (n >= 1) { + U32 f1a; + __asm__( + "pushl %%ebx\n\t" + "cpuid\n\t" + "popl %%ebx\n\t" + : "=a"(f1a), "=c"(f1c), "=d"(f1d) + : "a"(1)); + } + if (n >= 7) { + __asm__( + "pushl %%ebx\n\t" + "cpuid\n\t" + "movl %%ebx, %%eax\n\t" + "popl %%ebx" + : "=a"(f7b), "=c"(f7c) + : "a"(7), "c"(0) + : "edx"); + } +#elif defined(__x86_64__) || defined(_M_X64) || defined(__i386__) + U32 n; + __asm__("cpuid" : "=a"(n) : "a"(0) : "ebx", "ecx", "edx"); + if (n >= 1) { + U32 f1a; + __asm__("cpuid" : "=a"(f1a), "=c"(f1c), "=d"(f1d) : "a"(1) : "ebx"); + } + if (n >= 7) { + U32 f7a; + __asm__("cpuid" + : "=a"(f7a), "=b"(f7b), "=c"(f7c) + : "a"(7), "c"(0) + : "edx"); + } +#endif + { + ZSTD_cpuid_t cpuid; + cpuid.f1c = f1c; + cpuid.f1d = f1d; + cpuid.f7b = f7b; + cpuid.f7c = f7c; + return cpuid; + } +} + +#define X(name, r, bit) \ + MEM_STATIC int ZSTD_cpuid_##name(ZSTD_cpuid_t const cpuid) { \ + return ((cpuid.r) & (1U << bit)) != 0; \ + } + +/* cpuid(1): Processor Info and Feature Bits. */ +#define C(name, bit) X(name, f1c, bit) + C(sse3, 0) + C(pclmuldq, 1) + C(dtes64, 2) + C(monitor, 3) + C(dscpl, 4) + C(vmx, 5) + C(smx, 6) + C(eist, 7) + C(tm2, 8) + C(ssse3, 9) + C(cnxtid, 10) + C(fma, 12) + C(cx16, 13) + C(xtpr, 14) + C(pdcm, 15) + C(pcid, 17) + C(dca, 18) + C(sse41, 19) + C(sse42, 20) + C(x2apic, 21) + C(movbe, 22) + C(popcnt, 23) + C(tscdeadline, 24) + C(aes, 25) + C(xsave, 26) + C(osxsave, 27) + C(avx, 28) + C(f16c, 29) + C(rdrand, 30) +#undef C +#define D(name, bit) X(name, f1d, bit) + D(fpu, 0) + D(vme, 1) + D(de, 2) + D(pse, 3) + D(tsc, 4) + D(msr, 5) + D(pae, 6) + D(mce, 7) + D(cx8, 8) + D(apic, 9) + D(sep, 11) + D(mtrr, 12) + D(pge, 13) + D(mca, 14) + D(cmov, 15) + D(pat, 16) + D(pse36, 17) + D(psn, 18) + D(clfsh, 19) + D(ds, 21) + D(acpi, 22) + D(mmx, 23) + D(fxsr, 24) + D(sse, 25) + D(sse2, 26) + D(ss, 27) + D(htt, 28) + D(tm, 29) + D(pbe, 31) +#undef D + +/* cpuid(7): Extended Features. */ +#define B(name, bit) X(name, f7b, bit) + B(bmi1, 3) + B(hle, 4) + B(avx2, 5) + B(smep, 7) + B(bmi2, 8) + B(erms, 9) + B(invpcid, 10) + B(rtm, 11) + B(mpx, 14) + B(avx512f, 16) + B(avx512dq, 17) + B(rdseed, 18) + B(adx, 19) + B(smap, 20) + B(avx512ifma, 21) + B(pcommit, 22) + B(clflushopt, 23) + B(clwb, 24) + B(avx512pf, 26) + B(avx512er, 27) + B(avx512cd, 28) + B(sha, 29) + B(avx512bw, 30) + B(avx512vl, 31) +#undef B +#define C(name, bit) X(name, f7c, bit) + C(prefetchwt1, 0) + C(avx512vbmi, 1) +#undef C + +#undef X + +#endif /* ZSTD_COMMON_CPU_H */ diff --git a/c-blosc/internal-complibs/zstd-1.5.6/common/debug.c b/c-blosc/internal-complibs/zstd-1.5.6/common/debug.c new file mode 100644 index 0000000..9d0b7d2 --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.5.6/common/debug.c @@ -0,0 +1,30 @@ +/* ****************************************************************** + * debug + * Part of FSE library + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * You can contact the author at : + * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ + + +/* + * This module only hosts one global variable + * which can be used to dynamically influence the verbosity of traces, + * such as DEBUGLOG and RAWLOG + */ + +#include "debug.h" + +#if !defined(ZSTD_LINUX_KERNEL) || (DEBUGLEVEL>=2) +/* We only use this when DEBUGLEVEL>=2, but we get -Werror=pedantic errors if a + * translation unit is empty. So remove this from Linux kernel builds, but + * otherwise just leave it in. + */ +int g_debuglevel = DEBUGLEVEL; +#endif diff --git a/c-blosc/internal-complibs/zstd-1.5.6/common/debug.h b/c-blosc/internal-complibs/zstd-1.5.6/common/debug.h new file mode 100644 index 0000000..a16b69e --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.5.6/common/debug.h @@ -0,0 +1,116 @@ +/* ****************************************************************** + * debug + * Part of FSE library + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * You can contact the author at : + * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ + + +/* + * The purpose of this header is to enable debug functions. + * They regroup assert(), DEBUGLOG() and RAWLOG() for run-time, + * and DEBUG_STATIC_ASSERT() for compile-time. + * + * By default, DEBUGLEVEL==0, which means run-time debug is disabled. + * + * Level 1 enables assert() only. + * Starting level 2, traces can be generated and pushed to stderr. + * The higher the level, the more verbose the traces. + * + * It's possible to dynamically adjust level using variable g_debug_level, + * which is only declared if DEBUGLEVEL>=2, + * and is a global variable, not multi-thread protected (use with care) + */ + +#ifndef DEBUG_H_12987983217 +#define DEBUG_H_12987983217 + +#if defined (__cplusplus) +extern "C" { +#endif + + +/* static assert is triggered at compile time, leaving no runtime artefact. + * static assert only works with compile-time constants. + * Also, this variant can only be used inside a function. */ +#define DEBUG_STATIC_ASSERT(c) (void)sizeof(char[(c) ? 1 : -1]) + + +/* DEBUGLEVEL is expected to be defined externally, + * typically through compiler command line. + * Value must be a number. */ +#ifndef DEBUGLEVEL +# define DEBUGLEVEL 0 +#endif + + +/* recommended values for DEBUGLEVEL : + * 0 : release mode, no debug, all run-time checks disabled + * 1 : enables assert() only, no display + * 2 : reserved, for currently active debug path + * 3 : events once per object lifetime (CCtx, CDict, etc.) + * 4 : events once per frame + * 5 : events once per block + * 6 : events once per sequence (verbose) + * 7+: events at every position (*very* verbose) + * + * It's generally inconvenient to output traces > 5. + * In which case, it's possible to selectively trigger high verbosity levels + * by modifying g_debug_level. + */ + +#if (DEBUGLEVEL>=1) +# define ZSTD_DEPS_NEED_ASSERT +# include "zstd_deps.h" +#else +# ifndef assert /* assert may be already defined, due to prior #include */ +# define assert(condition) ((void)0) /* disable assert (default) */ +# endif +#endif + +#if (DEBUGLEVEL>=2) +# define ZSTD_DEPS_NEED_IO +# include "zstd_deps.h" +extern int g_debuglevel; /* the variable is only declared, + it actually lives in debug.c, + and is shared by the whole process. + It's not thread-safe. + It's useful when enabling very verbose levels + on selective conditions (such as position in src) */ + +# define RAWLOG(l, ...) \ + do { \ + if (l<=g_debuglevel) { \ + ZSTD_DEBUG_PRINT(__VA_ARGS__); \ + } \ + } while (0) + +#define STRINGIFY(x) #x +#define TOSTRING(x) STRINGIFY(x) +#define LINE_AS_STRING TOSTRING(__LINE__) + +# define DEBUGLOG(l, ...) \ + do { \ + if (l<=g_debuglevel) { \ + ZSTD_DEBUG_PRINT(__FILE__ ":" LINE_AS_STRING ": " __VA_ARGS__); \ + ZSTD_DEBUG_PRINT(" \n"); \ + } \ + } while (0) +#else +# define RAWLOG(l, ...) do { } while (0) /* disabled */ +# define DEBUGLOG(l, ...) do { } while (0) /* disabled */ +#endif + + +#if defined (__cplusplus) +} +#endif + +#endif /* DEBUG_H_12987983217 */ diff --git a/c-blosc/internal-complibs/zstd-1.5.6/common/entropy_common.c b/c-blosc/internal-complibs/zstd-1.5.6/common/entropy_common.c new file mode 100644 index 0000000..e2173af --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.5.6/common/entropy_common.c @@ -0,0 +1,340 @@ +/* ****************************************************************** + * Common functions of New Generation Entropy library + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * You can contact the author at : + * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy + * - Public forum : https://groups.google.com/forum/#!forum/lz4c + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ + +/* ************************************* +* Dependencies +***************************************/ +#include "mem.h" +#include "error_private.h" /* ERR_*, ERROR */ +#define FSE_STATIC_LINKING_ONLY /* FSE_MIN_TABLELOG */ +#include "fse.h" +#include "huf.h" +#include "bits.h" /* ZSDT_highbit32, ZSTD_countTrailingZeros32 */ + + +/*=== Version ===*/ +unsigned FSE_versionNumber(void) { return FSE_VERSION_NUMBER; } + + +/*=== Error Management ===*/ +unsigned FSE_isError(size_t code) { return ERR_isError(code); } +const char* FSE_getErrorName(size_t code) { return ERR_getErrorName(code); } + +unsigned HUF_isError(size_t code) { return ERR_isError(code); } +const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); } + + +/*-************************************************************** +* FSE NCount encoding-decoding +****************************************************************/ +FORCE_INLINE_TEMPLATE +size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, + const void* headerBuffer, size_t hbSize) +{ + const BYTE* const istart = (const BYTE*) headerBuffer; + const BYTE* const iend = istart + hbSize; + const BYTE* ip = istart; + int nbBits; + int remaining; + int threshold; + U32 bitStream; + int bitCount; + unsigned charnum = 0; + unsigned const maxSV1 = *maxSVPtr + 1; + int previous0 = 0; + + if (hbSize < 8) { + /* This function only works when hbSize >= 8 */ + char buffer[8] = {0}; + ZSTD_memcpy(buffer, headerBuffer, hbSize); + { size_t const countSize = FSE_readNCount(normalizedCounter, maxSVPtr, tableLogPtr, + buffer, sizeof(buffer)); + if (FSE_isError(countSize)) return countSize; + if (countSize > hbSize) return ERROR(corruption_detected); + return countSize; + } } + assert(hbSize >= 8); + + /* init */ + ZSTD_memset(normalizedCounter, 0, (*maxSVPtr+1) * sizeof(normalizedCounter[0])); /* all symbols not present in NCount have a frequency of 0 */ + bitStream = MEM_readLE32(ip); + nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */ + if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge); + bitStream >>= 4; + bitCount = 4; + *tableLogPtr = nbBits; + remaining = (1<> 1; + while (repeats >= 12) { + charnum += 3 * 12; + if (LIKELY(ip <= iend-7)) { + ip += 3; + } else { + bitCount -= (int)(8 * (iend - 7 - ip)); + bitCount &= 31; + ip = iend - 4; + } + bitStream = MEM_readLE32(ip) >> bitCount; + repeats = ZSTD_countTrailingZeros32(~bitStream | 0x80000000) >> 1; + } + charnum += 3 * repeats; + bitStream >>= 2 * repeats; + bitCount += 2 * repeats; + + /* Add the final repeat which isn't 0b11. */ + assert((bitStream & 3) < 3); + charnum += bitStream & 3; + bitCount += 2; + + /* This is an error, but break and return an error + * at the end, because returning out of a loop makes + * it harder for the compiler to optimize. + */ + if (charnum >= maxSV1) break; + + /* We don't need to set the normalized count to 0 + * because we already memset the whole buffer to 0. + */ + + if (LIKELY(ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) { + assert((bitCount >> 3) <= 3); /* For first condition to work */ + ip += bitCount>>3; + bitCount &= 7; + } else { + bitCount -= (int)(8 * (iend - 4 - ip)); + bitCount &= 31; + ip = iend - 4; + } + bitStream = MEM_readLE32(ip) >> bitCount; + } + { + int const max = (2*threshold-1) - remaining; + int count; + + if ((bitStream & (threshold-1)) < (U32)max) { + count = bitStream & (threshold-1); + bitCount += nbBits-1; + } else { + count = bitStream & (2*threshold-1); + if (count >= threshold) count -= max; + bitCount += nbBits; + } + + count--; /* extra accuracy */ + /* When it matters (small blocks), this is a + * predictable branch, because we don't use -1. + */ + if (count >= 0) { + remaining -= count; + } else { + assert(count == -1); + remaining += count; + } + normalizedCounter[charnum++] = (short)count; + previous0 = !count; + + assert(threshold > 1); + if (remaining < threshold) { + /* This branch can be folded into the + * threshold update condition because we + * know that threshold > 1. + */ + if (remaining <= 1) break; + nbBits = ZSTD_highbit32(remaining) + 1; + threshold = 1 << (nbBits - 1); + } + if (charnum >= maxSV1) break; + + if (LIKELY(ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) { + ip += bitCount>>3; + bitCount &= 7; + } else { + bitCount -= (int)(8 * (iend - 4 - ip)); + bitCount &= 31; + ip = iend - 4; + } + bitStream = MEM_readLE32(ip) >> bitCount; + } } + if (remaining != 1) return ERROR(corruption_detected); + /* Only possible when there are too many zeros. */ + if (charnum > maxSV1) return ERROR(maxSymbolValue_tooSmall); + if (bitCount > 32) return ERROR(corruption_detected); + *maxSVPtr = charnum-1; + + ip += (bitCount+7)>>3; + return ip-istart; +} + +/* Avoids the FORCE_INLINE of the _body() function. */ +static size_t FSE_readNCount_body_default( + short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, + const void* headerBuffer, size_t hbSize) +{ + return FSE_readNCount_body(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize); +} + +#if DYNAMIC_BMI2 +BMI2_TARGET_ATTRIBUTE static size_t FSE_readNCount_body_bmi2( + short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, + const void* headerBuffer, size_t hbSize) +{ + return FSE_readNCount_body(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize); +} +#endif + +size_t FSE_readNCount_bmi2( + short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, + const void* headerBuffer, size_t hbSize, int bmi2) +{ +#if DYNAMIC_BMI2 + if (bmi2) { + return FSE_readNCount_body_bmi2(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize); + } +#endif + (void)bmi2; + return FSE_readNCount_body_default(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize); +} + +size_t FSE_readNCount( + short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, + const void* headerBuffer, size_t hbSize) +{ + return FSE_readNCount_bmi2(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize, /* bmi2 */ 0); +} + + +/*! HUF_readStats() : + Read compact Huffman tree, saved by HUF_writeCTable(). + `huffWeight` is destination buffer. + `rankStats` is assumed to be a table of at least HUF_TABLELOG_MAX U32. + @return : size read from `src` , or an error Code . + Note : Needed by HUF_readCTable() and HUF_readDTableX?() . +*/ +size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, + U32* nbSymbolsPtr, U32* tableLogPtr, + const void* src, size_t srcSize) +{ + U32 wksp[HUF_READ_STATS_WORKSPACE_SIZE_U32]; + return HUF_readStats_wksp(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, wksp, sizeof(wksp), /* flags */ 0); +} + +FORCE_INLINE_TEMPLATE size_t +HUF_readStats_body(BYTE* huffWeight, size_t hwSize, U32* rankStats, + U32* nbSymbolsPtr, U32* tableLogPtr, + const void* src, size_t srcSize, + void* workSpace, size_t wkspSize, + int bmi2) +{ + U32 weightTotal; + const BYTE* ip = (const BYTE*) src; + size_t iSize; + size_t oSize; + + if (!srcSize) return ERROR(srcSize_wrong); + iSize = ip[0]; + /* ZSTD_memset(huffWeight, 0, hwSize); *//* is not necessary, even though some analyzer complain ... */ + + if (iSize >= 128) { /* special header */ + oSize = iSize - 127; + iSize = ((oSize+1)/2); + if (iSize+1 > srcSize) return ERROR(srcSize_wrong); + if (oSize >= hwSize) return ERROR(corruption_detected); + ip += 1; + { U32 n; + for (n=0; n> 4; + huffWeight[n+1] = ip[n/2] & 15; + } } } + else { /* header compressed with FSE (normal case) */ + if (iSize+1 > srcSize) return ERROR(srcSize_wrong); + /* max (hwSize-1) values decoded, as last one is implied */ + oSize = FSE_decompress_wksp_bmi2(huffWeight, hwSize-1, ip+1, iSize, 6, workSpace, wkspSize, bmi2); + if (FSE_isError(oSize)) return oSize; + } + + /* collect weight stats */ + ZSTD_memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32)); + weightTotal = 0; + { U32 n; for (n=0; n HUF_TABLELOG_MAX) return ERROR(corruption_detected); + rankStats[huffWeight[n]]++; + weightTotal += (1 << huffWeight[n]) >> 1; + } } + if (weightTotal == 0) return ERROR(corruption_detected); + + /* get last non-null symbol weight (implied, total must be 2^n) */ + { U32 const tableLog = ZSTD_highbit32(weightTotal) + 1; + if (tableLog > HUF_TABLELOG_MAX) return ERROR(corruption_detected); + *tableLogPtr = tableLog; + /* determine last weight */ + { U32 const total = 1 << tableLog; + U32 const rest = total - weightTotal; + U32 const verif = 1 << ZSTD_highbit32(rest); + U32 const lastWeight = ZSTD_highbit32(rest) + 1; + if (verif != rest) return ERROR(corruption_detected); /* last value must be a clean power of 2 */ + huffWeight[oSize] = (BYTE)lastWeight; + rankStats[lastWeight]++; + } } + + /* check tree construction validity */ + if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected); /* by construction : at least 2 elts of rank 1, must be even */ + + /* results */ + *nbSymbolsPtr = (U32)(oSize+1); + return iSize+1; +} + +/* Avoids the FORCE_INLINE of the _body() function. */ +static size_t HUF_readStats_body_default(BYTE* huffWeight, size_t hwSize, U32* rankStats, + U32* nbSymbolsPtr, U32* tableLogPtr, + const void* src, size_t srcSize, + void* workSpace, size_t wkspSize) +{ + return HUF_readStats_body(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize, 0); +} + +#if DYNAMIC_BMI2 +static BMI2_TARGET_ATTRIBUTE size_t HUF_readStats_body_bmi2(BYTE* huffWeight, size_t hwSize, U32* rankStats, + U32* nbSymbolsPtr, U32* tableLogPtr, + const void* src, size_t srcSize, + void* workSpace, size_t wkspSize) +{ + return HUF_readStats_body(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize, 1); +} +#endif + +size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize, U32* rankStats, + U32* nbSymbolsPtr, U32* tableLogPtr, + const void* src, size_t srcSize, + void* workSpace, size_t wkspSize, + int flags) +{ +#if DYNAMIC_BMI2 + if (flags & HUF_flags_bmi2) { + return HUF_readStats_body_bmi2(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize); + } +#endif + (void)flags; + return HUF_readStats_body_default(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize); +} diff --git a/c-blosc/internal-complibs/zstd-1.5.6/common/error_private.c b/c-blosc/internal-complibs/zstd-1.5.6/common/error_private.c new file mode 100644 index 0000000..075fc5e --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.5.6/common/error_private.c @@ -0,0 +1,63 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/* The purpose of this file is to have a single list of error strings embedded in binary */ + +#include "error_private.h" + +const char* ERR_getErrorString(ERR_enum code) +{ +#ifdef ZSTD_STRIP_ERROR_STRINGS + (void)code; + return "Error strings stripped"; +#else + static const char* const notErrorCode = "Unspecified error code"; + switch( code ) + { + case PREFIX(no_error): return "No error detected"; + case PREFIX(GENERIC): return "Error (generic)"; + case PREFIX(prefix_unknown): return "Unknown frame descriptor"; + case PREFIX(version_unsupported): return "Version not supported"; + case PREFIX(frameParameter_unsupported): return "Unsupported frame parameter"; + case PREFIX(frameParameter_windowTooLarge): return "Frame requires too much memory for decoding"; + case PREFIX(corruption_detected): return "Data corruption detected"; + case PREFIX(checksum_wrong): return "Restored data doesn't match checksum"; + case PREFIX(literals_headerWrong): return "Header of Literals' block doesn't respect format specification"; + case PREFIX(parameter_unsupported): return "Unsupported parameter"; + case PREFIX(parameter_combination_unsupported): return "Unsupported combination of parameters"; + case PREFIX(parameter_outOfBound): return "Parameter is out of bound"; + case PREFIX(init_missing): return "Context should be init first"; + case PREFIX(memory_allocation): return "Allocation error : not enough memory"; + case PREFIX(workSpace_tooSmall): return "workSpace buffer is not large enough"; + case PREFIX(stage_wrong): return "Operation not authorized at current processing stage"; + case PREFIX(tableLog_tooLarge): return "tableLog requires too much memory : unsupported"; + case PREFIX(maxSymbolValue_tooLarge): return "Unsupported max Symbol Value : too large"; + case PREFIX(maxSymbolValue_tooSmall): return "Specified maxSymbolValue is too small"; + case PREFIX(stabilityCondition_notRespected): return "pledged buffer stability condition is not respected"; + case PREFIX(dictionary_corrupted): return "Dictionary is corrupted"; + case PREFIX(dictionary_wrong): return "Dictionary mismatch"; + case PREFIX(dictionaryCreation_failed): return "Cannot create Dictionary from provided samples"; + case PREFIX(dstSize_tooSmall): return "Destination buffer is too small"; + case PREFIX(srcSize_wrong): return "Src size is incorrect"; + case PREFIX(dstBuffer_null): return "Operation on NULL destination buffer"; + case PREFIX(noForwardProgress_destFull): return "Operation made no progress over multiple calls, due to output buffer being full"; + case PREFIX(noForwardProgress_inputEmpty): return "Operation made no progress over multiple calls, due to input being empty"; + /* following error codes are not stable and may be removed or changed in a future version */ + case PREFIX(frameIndex_tooLarge): return "Frame index is too large"; + case PREFIX(seekableIO): return "An I/O error occurred when reading/seeking"; + case PREFIX(dstBuffer_wrong): return "Destination buffer is wrong"; + case PREFIX(srcBuffer_wrong): return "Source buffer is wrong"; + case PREFIX(sequenceProducer_failed): return "Block-level external sequence producer returned an error code"; + case PREFIX(externalSequences_invalid): return "External sequences are not valid"; + case PREFIX(maxCode): + default: return notErrorCode; + } +#endif +} diff --git a/c-blosc/internal-complibs/zstd-1.5.6/common/error_private.h b/c-blosc/internal-complibs/zstd-1.5.6/common/error_private.h new file mode 100644 index 0000000..0156010 --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.5.6/common/error_private.h @@ -0,0 +1,168 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/* Note : this module is expected to remain private, do not expose it */ + +#ifndef ERROR_H_MODULE +#define ERROR_H_MODULE + +#if defined (__cplusplus) +extern "C" { +#endif + + +/* **************************************** +* Dependencies +******************************************/ +#include "../zstd_errors.h" /* enum list */ +#include "compiler.h" +#include "debug.h" +#include "zstd_deps.h" /* size_t */ + + +/* **************************************** +* Compiler-specific +******************************************/ +#if defined(__GNUC__) +# define ERR_STATIC static __attribute__((unused)) +#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +# define ERR_STATIC static inline +#elif defined(_MSC_VER) +# define ERR_STATIC static __inline +#else +# define ERR_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */ +#endif + + +/*-**************************************** +* Customization (error_public.h) +******************************************/ +typedef ZSTD_ErrorCode ERR_enum; +#define PREFIX(name) ZSTD_error_##name + + +/*-**************************************** +* Error codes handling +******************************************/ +#undef ERROR /* already defined on Visual Studio */ +#define ERROR(name) ZSTD_ERROR(name) +#define ZSTD_ERROR(name) ((size_t)-PREFIX(name)) + +ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); } + +ERR_STATIC ERR_enum ERR_getErrorCode(size_t code) { if (!ERR_isError(code)) return (ERR_enum)0; return (ERR_enum) (0-code); } + +/* check and forward error code */ +#define CHECK_V_F(e, f) \ + size_t const e = f; \ + do { \ + if (ERR_isError(e)) \ + return e; \ + } while (0) +#define CHECK_F(f) do { CHECK_V_F(_var_err__, f); } while (0) + + +/*-**************************************** +* Error Strings +******************************************/ + +const char* ERR_getErrorString(ERR_enum code); /* error_private.c */ + +ERR_STATIC const char* ERR_getErrorName(size_t code) +{ + return ERR_getErrorString(ERR_getErrorCode(code)); +} + +/** + * Ignore: this is an internal helper. + * + * This is a helper function to help force C99-correctness during compilation. + * Under strict compilation modes, variadic macro arguments can't be empty. + * However, variadic function arguments can be. Using a function therefore lets + * us statically check that at least one (string) argument was passed, + * independent of the compilation flags. + */ +static INLINE_KEYWORD UNUSED_ATTR +void _force_has_format_string(const char *format, ...) { + (void)format; +} + +/** + * Ignore: this is an internal helper. + * + * We want to force this function invocation to be syntactically correct, but + * we don't want to force runtime evaluation of its arguments. + */ +#define _FORCE_HAS_FORMAT_STRING(...) \ + do { \ + if (0) { \ + _force_has_format_string(__VA_ARGS__); \ + } \ + } while (0) + +#define ERR_QUOTE(str) #str + +/** + * Return the specified error if the condition evaluates to true. + * + * In debug modes, prints additional information. + * In order to do that (particularly, printing the conditional that failed), + * this can't just wrap RETURN_ERROR(). + */ +#define RETURN_ERROR_IF(cond, err, ...) \ + do { \ + if (cond) { \ + RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", \ + __FILE__, __LINE__, ERR_QUOTE(cond), ERR_QUOTE(ERROR(err))); \ + _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ + RAWLOG(3, ": " __VA_ARGS__); \ + RAWLOG(3, "\n"); \ + return ERROR(err); \ + } \ + } while (0) + +/** + * Unconditionally return the specified error. + * + * In debug modes, prints additional information. + */ +#define RETURN_ERROR(err, ...) \ + do { \ + RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", \ + __FILE__, __LINE__, ERR_QUOTE(ERROR(err))); \ + _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ + RAWLOG(3, ": " __VA_ARGS__); \ + RAWLOG(3, "\n"); \ + return ERROR(err); \ + } while(0) + +/** + * If the provided expression evaluates to an error code, returns that error code. + * + * In debug modes, prints additional information. + */ +#define FORWARD_IF_ERROR(err, ...) \ + do { \ + size_t const err_code = (err); \ + if (ERR_isError(err_code)) { \ + RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", \ + __FILE__, __LINE__, ERR_QUOTE(err), ERR_getErrorName(err_code)); \ + _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ + RAWLOG(3, ": " __VA_ARGS__); \ + RAWLOG(3, "\n"); \ + return err_code; \ + } \ + } while(0) + +#if defined (__cplusplus) +} +#endif + +#endif /* ERROR_H_MODULE */ diff --git a/c-blosc/internal-complibs/zstd-1.5.6/common/fse.h b/c-blosc/internal-complibs/zstd-1.5.6/common/fse.h new file mode 100644 index 0000000..2ae128e --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.5.6/common/fse.h @@ -0,0 +1,640 @@ +/* ****************************************************************** + * FSE : Finite State Entropy codec + * Public Prototypes declaration + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * You can contact the author at : + * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ + +#if defined (__cplusplus) +extern "C" { +#endif + +#ifndef FSE_H +#define FSE_H + + +/*-***************************************** +* Dependencies +******************************************/ +#include "zstd_deps.h" /* size_t, ptrdiff_t */ + + +/*-***************************************** +* FSE_PUBLIC_API : control library symbols visibility +******************************************/ +#if defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) && defined(__GNUC__) && (__GNUC__ >= 4) +# define FSE_PUBLIC_API __attribute__ ((visibility ("default"))) +#elif defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) /* Visual expected */ +# define FSE_PUBLIC_API __declspec(dllexport) +#elif defined(FSE_DLL_IMPORT) && (FSE_DLL_IMPORT==1) +# define FSE_PUBLIC_API __declspec(dllimport) /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/ +#else +# define FSE_PUBLIC_API +#endif + +/*------ Version ------*/ +#define FSE_VERSION_MAJOR 0 +#define FSE_VERSION_MINOR 9 +#define FSE_VERSION_RELEASE 0 + +#define FSE_LIB_VERSION FSE_VERSION_MAJOR.FSE_VERSION_MINOR.FSE_VERSION_RELEASE +#define FSE_QUOTE(str) #str +#define FSE_EXPAND_AND_QUOTE(str) FSE_QUOTE(str) +#define FSE_VERSION_STRING FSE_EXPAND_AND_QUOTE(FSE_LIB_VERSION) + +#define FSE_VERSION_NUMBER (FSE_VERSION_MAJOR *100*100 + FSE_VERSION_MINOR *100 + FSE_VERSION_RELEASE) +FSE_PUBLIC_API unsigned FSE_versionNumber(void); /**< library version number; to be used when checking dll version */ + + +/*-***************************************** +* Tool functions +******************************************/ +FSE_PUBLIC_API size_t FSE_compressBound(size_t size); /* maximum compressed size */ + +/* Error Management */ +FSE_PUBLIC_API unsigned FSE_isError(size_t code); /* tells if a return value is an error code */ +FSE_PUBLIC_API const char* FSE_getErrorName(size_t code); /* provides error code string (useful for debugging) */ + + +/*-***************************************** +* FSE detailed API +******************************************/ +/*! +FSE_compress() does the following: +1. count symbol occurrence from source[] into table count[] (see hist.h) +2. normalize counters so that sum(count[]) == Power_of_2 (2^tableLog) +3. save normalized counters to memory buffer using writeNCount() +4. build encoding table 'CTable' from normalized counters +5. encode the data stream using encoding table 'CTable' + +FSE_decompress() does the following: +1. read normalized counters with readNCount() +2. build decoding table 'DTable' from normalized counters +3. decode the data stream using decoding table 'DTable' + +The following API allows targeting specific sub-functions for advanced tasks. +For example, it's possible to compress several blocks using the same 'CTable', +or to save and provide normalized distribution using external method. +*/ + +/* *** COMPRESSION *** */ + +/*! FSE_optimalTableLog(): + dynamically downsize 'tableLog' when conditions are met. + It saves CPU time, by using smaller tables, while preserving or even improving compression ratio. + @return : recommended tableLog (necessarily <= 'maxTableLog') */ +FSE_PUBLIC_API unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue); + +/*! FSE_normalizeCount(): + normalize counts so that sum(count[]) == Power_of_2 (2^tableLog) + 'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1). + useLowProbCount is a boolean parameter which trades off compressed size for + faster header decoding. When it is set to 1, the compressed data will be slightly + smaller. And when it is set to 0, FSE_readNCount() and FSE_buildDTable() will be + faster. If you are compressing a small amount of data (< 2 KB) then useLowProbCount=0 + is a good default, since header deserialization makes a big speed difference. + Otherwise, useLowProbCount=1 is a good default, since the speed difference is small. + @return : tableLog, + or an errorCode, which can be tested using FSE_isError() */ +FSE_PUBLIC_API size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog, + const unsigned* count, size_t srcSize, unsigned maxSymbolValue, unsigned useLowProbCount); + +/*! FSE_NCountWriteBound(): + Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'. + Typically useful for allocation purpose. */ +FSE_PUBLIC_API size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog); + +/*! FSE_writeNCount(): + Compactly save 'normalizedCounter' into 'buffer'. + @return : size of the compressed table, + or an errorCode, which can be tested using FSE_isError(). */ +FSE_PUBLIC_API size_t FSE_writeNCount (void* buffer, size_t bufferSize, + const short* normalizedCounter, + unsigned maxSymbolValue, unsigned tableLog); + +/*! Constructor and Destructor of FSE_CTable. + Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */ +typedef unsigned FSE_CTable; /* don't allocate that. It's only meant to be more restrictive than void* */ + +/*! FSE_buildCTable(): + Builds `ct`, which must be already allocated, using FSE_createCTable(). + @return : 0, or an errorCode, which can be tested using FSE_isError() */ +FSE_PUBLIC_API size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog); + +/*! FSE_compress_usingCTable(): + Compress `src` using `ct` into `dst` which must be already allocated. + @return : size of compressed data (<= `dstCapacity`), + or 0 if compressed data could not fit into `dst`, + or an errorCode, which can be tested using FSE_isError() */ +FSE_PUBLIC_API size_t FSE_compress_usingCTable (void* dst, size_t dstCapacity, const void* src, size_t srcSize, const FSE_CTable* ct); + +/*! +Tutorial : +---------- +The first step is to count all symbols. FSE_count() does this job very fast. +Result will be saved into 'count', a table of unsigned int, which must be already allocated, and have 'maxSymbolValuePtr[0]+1' cells. +'src' is a table of bytes of size 'srcSize'. All values within 'src' MUST be <= maxSymbolValuePtr[0] +maxSymbolValuePtr[0] will be updated, with its real value (necessarily <= original value) +FSE_count() will return the number of occurrence of the most frequent symbol. +This can be used to know if there is a single symbol within 'src', and to quickly evaluate its compressibility. +If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()). + +The next step is to normalize the frequencies. +FSE_normalizeCount() will ensure that sum of frequencies is == 2 ^'tableLog'. +It also guarantees a minimum of 1 to any Symbol with frequency >= 1. +You can use 'tableLog'==0 to mean "use default tableLog value". +If you are unsure of which tableLog value to use, you can ask FSE_optimalTableLog(), +which will provide the optimal valid tableLog given sourceSize, maxSymbolValue, and a user-defined maximum (0 means "default"). + +The result of FSE_normalizeCount() will be saved into a table, +called 'normalizedCounter', which is a table of signed short. +'normalizedCounter' must be already allocated, and have at least 'maxSymbolValue+1' cells. +The return value is tableLog if everything proceeded as expected. +It is 0 if there is a single symbol within distribution. +If there is an error (ex: invalid tableLog value), the function will return an ErrorCode (which can be tested using FSE_isError()). + +'normalizedCounter' can be saved in a compact manner to a memory area using FSE_writeNCount(). +'buffer' must be already allocated. +For guaranteed success, buffer size must be at least FSE_headerBound(). +The result of the function is the number of bytes written into 'buffer'. +If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError(); ex : buffer size too small). + +'normalizedCounter' can then be used to create the compression table 'CTable'. +The space required by 'CTable' must be already allocated, using FSE_createCTable(). +You can then use FSE_buildCTable() to fill 'CTable'. +If there is an error, both functions will return an ErrorCode (which can be tested using FSE_isError()). + +'CTable' can then be used to compress 'src', with FSE_compress_usingCTable(). +Similar to FSE_count(), the convention is that 'src' is assumed to be a table of char of size 'srcSize' +The function returns the size of compressed data (without header), necessarily <= `dstCapacity`. +If it returns '0', compressed data could not fit into 'dst'. +If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()). +*/ + + +/* *** DECOMPRESSION *** */ + +/*! FSE_readNCount(): + Read compactly saved 'normalizedCounter' from 'rBuffer'. + @return : size read from 'rBuffer', + or an errorCode, which can be tested using FSE_isError(). + maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */ +FSE_PUBLIC_API size_t FSE_readNCount (short* normalizedCounter, + unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, + const void* rBuffer, size_t rBuffSize); + +/*! FSE_readNCount_bmi2(): + * Same as FSE_readNCount() but pass bmi2=1 when your CPU supports BMI2 and 0 otherwise. + */ +FSE_PUBLIC_API size_t FSE_readNCount_bmi2(short* normalizedCounter, + unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, + const void* rBuffer, size_t rBuffSize, int bmi2); + +typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */ + +/*! +Tutorial : +---------- +(Note : these functions only decompress FSE-compressed blocks. + If block is uncompressed, use memcpy() instead + If block is a single repeated byte, use memset() instead ) + +The first step is to obtain the normalized frequencies of symbols. +This can be performed by FSE_readNCount() if it was saved using FSE_writeNCount(). +'normalizedCounter' must be already allocated, and have at least 'maxSymbolValuePtr[0]+1' cells of signed short. +In practice, that means it's necessary to know 'maxSymbolValue' beforehand, +or size the table to handle worst case situations (typically 256). +FSE_readNCount() will provide 'tableLog' and 'maxSymbolValue'. +The result of FSE_readNCount() is the number of bytes read from 'rBuffer'. +Note that 'rBufferSize' must be at least 4 bytes, even if useful information is less than that. +If there is an error, the function will return an error code, which can be tested using FSE_isError(). + +The next step is to build the decompression tables 'FSE_DTable' from 'normalizedCounter'. +This is performed by the function FSE_buildDTable(). +The space required by 'FSE_DTable' must be already allocated using FSE_createDTable(). +If there is an error, the function will return an error code, which can be tested using FSE_isError(). + +`FSE_DTable` can then be used to decompress `cSrc`, with FSE_decompress_usingDTable(). +`cSrcSize` must be strictly correct, otherwise decompression will fail. +FSE_decompress_usingDTable() result will tell how many bytes were regenerated (<=`dstCapacity`). +If there is an error, the function will return an error code, which can be tested using FSE_isError(). (ex: dst buffer too small) +*/ + +#endif /* FSE_H */ + + +#if defined(FSE_STATIC_LINKING_ONLY) && !defined(FSE_H_FSE_STATIC_LINKING_ONLY) +#define FSE_H_FSE_STATIC_LINKING_ONLY + +/* *** Dependency *** */ +#include "bitstream.h" + + +/* ***************************************** +* Static allocation +*******************************************/ +/* FSE buffer bounds */ +#define FSE_NCOUNTBOUND 512 +#define FSE_BLOCKBOUND(size) ((size) + ((size)>>7) + 4 /* fse states */ + sizeof(size_t) /* bitContainer */) +#define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size)) /* Macro version, useful for static allocation */ + +/* It is possible to statically allocate FSE CTable/DTable as a table of FSE_CTable/FSE_DTable using below macros */ +#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) (1 + (1<<((maxTableLog)-1)) + (((maxSymbolValue)+1)*2)) +#define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1<<(maxTableLog))) + +/* or use the size to malloc() space directly. Pay attention to alignment restrictions though */ +#define FSE_CTABLE_SIZE(maxTableLog, maxSymbolValue) (FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(FSE_CTable)) +#define FSE_DTABLE_SIZE(maxTableLog) (FSE_DTABLE_SIZE_U32(maxTableLog) * sizeof(FSE_DTable)) + + +/* ***************************************** + * FSE advanced API + ***************************************** */ + +unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus); +/**< same as FSE_optimalTableLog(), which used `minus==2` */ + +size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue); +/**< build a fake FSE_CTable, designed to compress always the same symbolValue */ + +/* FSE_buildCTable_wksp() : + * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`). + * `wkspSize` must be >= `FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog)` of `unsigned`. + * See FSE_buildCTable_wksp() for breakdown of workspace usage. + */ +#define FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog) (((maxSymbolValue + 2) + (1ull << (tableLog)))/2 + sizeof(U64)/sizeof(U32) /* additional 8 bytes for potential table overwrite */) +#define FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) (sizeof(unsigned) * FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog)) +size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); + +#define FSE_BUILD_DTABLE_WKSP_SIZE(maxTableLog, maxSymbolValue) (sizeof(short) * (maxSymbolValue + 1) + (1ULL << maxTableLog) + 8) +#define FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ((FSE_BUILD_DTABLE_WKSP_SIZE(maxTableLog, maxSymbolValue) + sizeof(unsigned) - 1) / sizeof(unsigned)) +FSE_PUBLIC_API size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); +/**< Same as FSE_buildDTable(), using an externally allocated `workspace` produced with `FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxSymbolValue)` */ + +#define FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) (FSE_DTABLE_SIZE_U32(maxTableLog) + 1 + FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) + (FSE_MAX_SYMBOL_VALUE + 1) / 2 + 1) +#define FSE_DECOMPRESS_WKSP_SIZE(maxTableLog, maxSymbolValue) (FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(unsigned)) +size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2); +/**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DECOMPRESS_WKSP_SIZE_U32(maxLog, maxSymbolValue)`. + * Set bmi2 to 1 if your CPU supports BMI2 or 0 if it doesn't */ + +typedef enum { + FSE_repeat_none, /**< Cannot use the previous table */ + FSE_repeat_check, /**< Can use the previous table but it must be checked */ + FSE_repeat_valid /**< Can use the previous table and it is assumed to be valid */ + } FSE_repeat; + +/* ***************************************** +* FSE symbol compression API +*******************************************/ +/*! + This API consists of small unitary functions, which highly benefit from being inlined. + Hence their body are included in next section. +*/ +typedef struct { + ptrdiff_t value; + const void* stateTable; + const void* symbolTT; + unsigned stateLog; +} FSE_CState_t; + +static void FSE_initCState(FSE_CState_t* CStatePtr, const FSE_CTable* ct); + +static void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* CStatePtr, unsigned symbol); + +static void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* CStatePtr); + +/**< +These functions are inner components of FSE_compress_usingCTable(). +They allow the creation of custom streams, mixing multiple tables and bit sources. + +A key property to keep in mind is that encoding and decoding are done **in reverse direction**. +So the first symbol you will encode is the last you will decode, like a LIFO stack. + +You will need a few variables to track your CStream. They are : + +FSE_CTable ct; // Provided by FSE_buildCTable() +BIT_CStream_t bitStream; // bitStream tracking structure +FSE_CState_t state; // State tracking structure (can have several) + + +The first thing to do is to init bitStream and state. + size_t errorCode = BIT_initCStream(&bitStream, dstBuffer, maxDstSize); + FSE_initCState(&state, ct); + +Note that BIT_initCStream() can produce an error code, so its result should be tested, using FSE_isError(); +You can then encode your input data, byte after byte. +FSE_encodeSymbol() outputs a maximum of 'tableLog' bits at a time. +Remember decoding will be done in reverse direction. + FSE_encodeByte(&bitStream, &state, symbol); + +At any time, you can also add any bit sequence. +Note : maximum allowed nbBits is 25, for compatibility with 32-bits decoders + BIT_addBits(&bitStream, bitField, nbBits); + +The above methods don't commit data to memory, they just store it into local register, for speed. +Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t). +Writing data to memory is a manual operation, performed by the flushBits function. + BIT_flushBits(&bitStream); + +Your last FSE encoding operation shall be to flush your last state value(s). + FSE_flushState(&bitStream, &state); + +Finally, you must close the bitStream. +The function returns the size of CStream in bytes. +If data couldn't fit into dstBuffer, it will return a 0 ( == not compressible) +If there is an error, it returns an errorCode (which can be tested using FSE_isError()). + size_t size = BIT_closeCStream(&bitStream); +*/ + + +/* ***************************************** +* FSE symbol decompression API +*******************************************/ +typedef struct { + size_t state; + const void* table; /* precise table may vary, depending on U16 */ +} FSE_DState_t; + + +static void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt); + +static unsigned char FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD); + +static unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr); + +/**< +Let's now decompose FSE_decompress_usingDTable() into its unitary components. +You will decode FSE-encoded symbols from the bitStream, +and also any other bitFields you put in, **in reverse order**. + +You will need a few variables to track your bitStream. They are : + +BIT_DStream_t DStream; // Stream context +FSE_DState_t DState; // State context. Multiple ones are possible +FSE_DTable* DTablePtr; // Decoding table, provided by FSE_buildDTable() + +The first thing to do is to init the bitStream. + errorCode = BIT_initDStream(&DStream, srcBuffer, srcSize); + +You should then retrieve your initial state(s) +(in reverse flushing order if you have several ones) : + errorCode = FSE_initDState(&DState, &DStream, DTablePtr); + +You can then decode your data, symbol after symbol. +For information the maximum number of bits read by FSE_decodeSymbol() is 'tableLog'. +Keep in mind that symbols are decoded in reverse order, like a LIFO stack (last in, first out). + unsigned char symbol = FSE_decodeSymbol(&DState, &DStream); + +You can retrieve any bitfield you eventually stored into the bitStream (in reverse order) +Note : maximum allowed nbBits is 25, for 32-bits compatibility + size_t bitField = BIT_readBits(&DStream, nbBits); + +All above operations only read from local register (which size depends on size_t). +Refueling the register from memory is manually performed by the reload method. + endSignal = FSE_reloadDStream(&DStream); + +BIT_reloadDStream() result tells if there is still some more data to read from DStream. +BIT_DStream_unfinished : there is still some data left into the DStream. +BIT_DStream_endOfBuffer : Dstream reached end of buffer. Its container may no longer be completely filled. +BIT_DStream_completed : Dstream reached its exact end, corresponding in general to decompression completed. +BIT_DStream_tooFar : Dstream went too far. Decompression result is corrupted. + +When reaching end of buffer (BIT_DStream_endOfBuffer), progress slowly, notably if you decode multiple symbols per loop, +to properly detect the exact end of stream. +After each decoded symbol, check if DStream is fully consumed using this simple test : + BIT_reloadDStream(&DStream) >= BIT_DStream_completed + +When it's done, verify decompression is fully completed, by checking both DStream and the relevant states. +Checking if DStream has reached its end is performed by : + BIT_endOfDStream(&DStream); +Check also the states. There might be some symbols left there, if some high probability ones (>50%) are possible. + FSE_endOfDState(&DState); +*/ + + +/* ***************************************** +* FSE unsafe API +*******************************************/ +static unsigned char FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD); +/* faster, but works only if nbBits is always >= 1 (otherwise, result will be corrupted) */ + + +/* ***************************************** +* Implementation of inlined functions +*******************************************/ +typedef struct { + int deltaFindState; + U32 deltaNbBits; +} FSE_symbolCompressionTransform; /* total 8 bytes */ + +MEM_STATIC void FSE_initCState(FSE_CState_t* statePtr, const FSE_CTable* ct) +{ + const void* ptr = ct; + const U16* u16ptr = (const U16*) ptr; + const U32 tableLog = MEM_read16(ptr); + statePtr->value = (ptrdiff_t)1<stateTable = u16ptr+2; + statePtr->symbolTT = ct + 1 + (tableLog ? (1<<(tableLog-1)) : 1); + statePtr->stateLog = tableLog; +} + + +/*! FSE_initCState2() : +* Same as FSE_initCState(), but the first symbol to include (which will be the last to be read) +* uses the smallest state value possible, saving the cost of this symbol */ +MEM_STATIC void FSE_initCState2(FSE_CState_t* statePtr, const FSE_CTable* ct, U32 symbol) +{ + FSE_initCState(statePtr, ct); + { const FSE_symbolCompressionTransform symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol]; + const U16* stateTable = (const U16*)(statePtr->stateTable); + U32 nbBitsOut = (U32)((symbolTT.deltaNbBits + (1<<15)) >> 16); + statePtr->value = (nbBitsOut << 16) - symbolTT.deltaNbBits; + statePtr->value = stateTable[(statePtr->value >> nbBitsOut) + symbolTT.deltaFindState]; + } +} + +MEM_STATIC void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* statePtr, unsigned symbol) +{ + FSE_symbolCompressionTransform const symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol]; + const U16* const stateTable = (const U16*)(statePtr->stateTable); + U32 const nbBitsOut = (U32)((statePtr->value + symbolTT.deltaNbBits) >> 16); + BIT_addBits(bitC, (size_t)statePtr->value, nbBitsOut); + statePtr->value = stateTable[ (statePtr->value >> nbBitsOut) + symbolTT.deltaFindState]; +} + +MEM_STATIC void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* statePtr) +{ + BIT_addBits(bitC, (size_t)statePtr->value, statePtr->stateLog); + BIT_flushBits(bitC); +} + + +/* FSE_getMaxNbBits() : + * Approximate maximum cost of a symbol, in bits. + * Fractional get rounded up (i.e. a symbol with a normalized frequency of 3 gives the same result as a frequency of 2) + * note 1 : assume symbolValue is valid (<= maxSymbolValue) + * note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */ +MEM_STATIC U32 FSE_getMaxNbBits(const void* symbolTTPtr, U32 symbolValue) +{ + const FSE_symbolCompressionTransform* symbolTT = (const FSE_symbolCompressionTransform*) symbolTTPtr; + return (symbolTT[symbolValue].deltaNbBits + ((1<<16)-1)) >> 16; +} + +/* FSE_bitCost() : + * Approximate symbol cost, as fractional value, using fixed-point format (accuracyLog fractional bits) + * note 1 : assume symbolValue is valid (<= maxSymbolValue) + * note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */ +MEM_STATIC U32 FSE_bitCost(const void* symbolTTPtr, U32 tableLog, U32 symbolValue, U32 accuracyLog) +{ + const FSE_symbolCompressionTransform* symbolTT = (const FSE_symbolCompressionTransform*) symbolTTPtr; + U32 const minNbBits = symbolTT[symbolValue].deltaNbBits >> 16; + U32 const threshold = (minNbBits+1) << 16; + assert(tableLog < 16); + assert(accuracyLog < 31-tableLog); /* ensure enough room for renormalization double shift */ + { U32 const tableSize = 1 << tableLog; + U32 const deltaFromThreshold = threshold - (symbolTT[symbolValue].deltaNbBits + tableSize); + U32 const normalizedDeltaFromThreshold = (deltaFromThreshold << accuracyLog) >> tableLog; /* linear interpolation (very approximate) */ + U32 const bitMultiplier = 1 << accuracyLog; + assert(symbolTT[symbolValue].deltaNbBits + tableSize <= threshold); + assert(normalizedDeltaFromThreshold <= bitMultiplier); + return (minNbBits+1)*bitMultiplier - normalizedDeltaFromThreshold; + } +} + + +/* ====== Decompression ====== */ + +typedef struct { + U16 tableLog; + U16 fastMode; +} FSE_DTableHeader; /* sizeof U32 */ + +typedef struct +{ + unsigned short newState; + unsigned char symbol; + unsigned char nbBits; +} FSE_decode_t; /* size == U32 */ + +MEM_STATIC void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt) +{ + const void* ptr = dt; + const FSE_DTableHeader* const DTableH = (const FSE_DTableHeader*)ptr; + DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog); + BIT_reloadDStream(bitD); + DStatePtr->table = dt + 1; +} + +MEM_STATIC BYTE FSE_peekSymbol(const FSE_DState_t* DStatePtr) +{ + FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; + return DInfo.symbol; +} + +MEM_STATIC void FSE_updateState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) +{ + FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; + U32 const nbBits = DInfo.nbBits; + size_t const lowBits = BIT_readBits(bitD, nbBits); + DStatePtr->state = DInfo.newState + lowBits; +} + +MEM_STATIC BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) +{ + FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; + U32 const nbBits = DInfo.nbBits; + BYTE const symbol = DInfo.symbol; + size_t const lowBits = BIT_readBits(bitD, nbBits); + + DStatePtr->state = DInfo.newState + lowBits; + return symbol; +} + +/*! FSE_decodeSymbolFast() : + unsafe, only works if no symbol has a probability > 50% */ +MEM_STATIC BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) +{ + FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; + U32 const nbBits = DInfo.nbBits; + BYTE const symbol = DInfo.symbol; + size_t const lowBits = BIT_readBitsFast(bitD, nbBits); + + DStatePtr->state = DInfo.newState + lowBits; + return symbol; +} + +MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr) +{ + return DStatePtr->state == 0; +} + + + +#ifndef FSE_COMMONDEFS_ONLY + +/* ************************************************************** +* Tuning parameters +****************************************************************/ +/*!MEMORY_USAGE : +* Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.) +* Increasing memory usage improves compression ratio +* Reduced memory usage can improve speed, due to cache effect +* Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */ +#ifndef FSE_MAX_MEMORY_USAGE +# define FSE_MAX_MEMORY_USAGE 14 +#endif +#ifndef FSE_DEFAULT_MEMORY_USAGE +# define FSE_DEFAULT_MEMORY_USAGE 13 +#endif +#if (FSE_DEFAULT_MEMORY_USAGE > FSE_MAX_MEMORY_USAGE) +# error "FSE_DEFAULT_MEMORY_USAGE must be <= FSE_MAX_MEMORY_USAGE" +#endif + +/*!FSE_MAX_SYMBOL_VALUE : +* Maximum symbol value authorized. +* Required for proper stack allocation */ +#ifndef FSE_MAX_SYMBOL_VALUE +# define FSE_MAX_SYMBOL_VALUE 255 +#endif + +/* ************************************************************** +* template functions type & suffix +****************************************************************/ +#define FSE_FUNCTION_TYPE BYTE +#define FSE_FUNCTION_EXTENSION +#define FSE_DECODE_TYPE FSE_decode_t + + +#endif /* !FSE_COMMONDEFS_ONLY */ + + +/* *************************************************************** +* Constants +*****************************************************************/ +#define FSE_MAX_TABLELOG (FSE_MAX_MEMORY_USAGE-2) +#define FSE_MAX_TABLESIZE (1U< FSE_TABLELOG_ABSOLUTE_MAX +# error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported" +#endif + +#define FSE_TABLESTEP(tableSize) (((tableSize)>>1) + ((tableSize)>>3) + 3) + + +#endif /* FSE_STATIC_LINKING_ONLY */ + + +#if defined (__cplusplus) +} +#endif diff --git a/c-blosc/internal-complibs/zstd-1.5.6/common/fse_decompress.c b/c-blosc/internal-complibs/zstd-1.5.6/common/fse_decompress.c new file mode 100644 index 0000000..0dcc464 --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.5.6/common/fse_decompress.c @@ -0,0 +1,313 @@ +/* ****************************************************************** + * FSE : Finite State Entropy decoder + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * You can contact the author at : + * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy + * - Public forum : https://groups.google.com/forum/#!forum/lz4c + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ + + +/* ************************************************************** +* Includes +****************************************************************/ +#include "debug.h" /* assert */ +#include "bitstream.h" +#include "compiler.h" +#define FSE_STATIC_LINKING_ONLY +#include "fse.h" +#include "error_private.h" +#include "zstd_deps.h" /* ZSTD_memcpy */ +#include "bits.h" /* ZSTD_highbit32 */ + + +/* ************************************************************** +* Error Management +****************************************************************/ +#define FSE_isError ERR_isError +#define FSE_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */ + + +/* ************************************************************** +* Templates +****************************************************************/ +/* + designed to be included + for type-specific functions (template emulation in C) + Objective is to write these functions only once, for improved maintenance +*/ + +/* safety checks */ +#ifndef FSE_FUNCTION_EXTENSION +# error "FSE_FUNCTION_EXTENSION must be defined" +#endif +#ifndef FSE_FUNCTION_TYPE +# error "FSE_FUNCTION_TYPE must be defined" +#endif + +/* Function names */ +#define FSE_CAT(X,Y) X##Y +#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y) +#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y) + +static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize) +{ + void* const tdPtr = dt+1; /* because *dt is unsigned, 32-bits aligned on 32-bits */ + FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (tdPtr); + U16* symbolNext = (U16*)workSpace; + BYTE* spread = (BYTE*)(symbolNext + maxSymbolValue + 1); + + U32 const maxSV1 = maxSymbolValue + 1; + U32 const tableSize = 1 << tableLog; + U32 highThreshold = tableSize-1; + + /* Sanity Checks */ + if (FSE_BUILD_DTABLE_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(maxSymbolValue_tooLarge); + if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge); + if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); + + /* Init, lay down lowprob symbols */ + { FSE_DTableHeader DTableH; + DTableH.tableLog = (U16)tableLog; + DTableH.fastMode = 1; + { S16 const largeLimit= (S16)(1 << (tableLog-1)); + U32 s; + for (s=0; s= largeLimit) DTableH.fastMode=0; + symbolNext[s] = (U16)normalizedCounter[s]; + } } } + ZSTD_memcpy(dt, &DTableH, sizeof(DTableH)); + } + + /* Spread symbols */ + if (highThreshold == tableSize - 1) { + size_t const tableMask = tableSize-1; + size_t const step = FSE_TABLESTEP(tableSize); + /* First lay down the symbols in order. + * We use a uint64_t to lay down 8 bytes at a time. This reduces branch + * misses since small blocks generally have small table logs, so nearly + * all symbols have counts <= 8. We ensure we have 8 bytes at the end of + * our buffer to handle the over-write. + */ + { U64 const add = 0x0101010101010101ull; + size_t pos = 0; + U64 sv = 0; + U32 s; + for (s=0; s highThreshold) position = (position + step) & tableMask; /* lowprob area */ + } } + if (position!=0) return ERROR(GENERIC); /* position must reach all cells once, otherwise normalizedCounter is incorrect */ + } + + /* Build Decoding table */ + { U32 u; + for (u=0; u sizeof(bitD.bitContainer)*8) /* This test must be static */ + BIT_reloadDStream(&bitD); + + op[1] = FSE_GETSYMBOL(&state2); + + if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */ + { if (BIT_reloadDStream(&bitD) > BIT_DStream_unfinished) { op+=2; break; } } + + op[2] = FSE_GETSYMBOL(&state1); + + if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */ + BIT_reloadDStream(&bitD); + + op[3] = FSE_GETSYMBOL(&state2); + } + + /* tail */ + /* note : BIT_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly BIT_DStream_completed */ + while (1) { + if (op>(omax-2)) return ERROR(dstSize_tooSmall); + *op++ = FSE_GETSYMBOL(&state1); + if (BIT_reloadDStream(&bitD)==BIT_DStream_overflow) { + *op++ = FSE_GETSYMBOL(&state2); + break; + } + + if (op>(omax-2)) return ERROR(dstSize_tooSmall); + *op++ = FSE_GETSYMBOL(&state2); + if (BIT_reloadDStream(&bitD)==BIT_DStream_overflow) { + *op++ = FSE_GETSYMBOL(&state1); + break; + } } + + assert(op >= ostart); + return (size_t)(op-ostart); +} + +typedef struct { + short ncount[FSE_MAX_SYMBOL_VALUE + 1]; +} FSE_DecompressWksp; + + +FORCE_INLINE_TEMPLATE size_t FSE_decompress_wksp_body( + void* dst, size_t dstCapacity, + const void* cSrc, size_t cSrcSize, + unsigned maxLog, void* workSpace, size_t wkspSize, + int bmi2) +{ + const BYTE* const istart = (const BYTE*)cSrc; + const BYTE* ip = istart; + unsigned tableLog; + unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE; + FSE_DecompressWksp* const wksp = (FSE_DecompressWksp*)workSpace; + size_t const dtablePos = sizeof(FSE_DecompressWksp) / sizeof(FSE_DTable); + FSE_DTable* const dtable = (FSE_DTable*)workSpace + dtablePos; + + FSE_STATIC_ASSERT((FSE_MAX_SYMBOL_VALUE + 1) % 2 == 0); + if (wkspSize < sizeof(*wksp)) return ERROR(GENERIC); + + /* correct offset to dtable depends on this property */ + FSE_STATIC_ASSERT(sizeof(FSE_DecompressWksp) % sizeof(FSE_DTable) == 0); + + /* normal FSE decoding mode */ + { size_t const NCountLength = + FSE_readNCount_bmi2(wksp->ncount, &maxSymbolValue, &tableLog, istart, cSrcSize, bmi2); + if (FSE_isError(NCountLength)) return NCountLength; + if (tableLog > maxLog) return ERROR(tableLog_tooLarge); + assert(NCountLength <= cSrcSize); + ip += NCountLength; + cSrcSize -= NCountLength; + } + + if (FSE_DECOMPRESS_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(tableLog_tooLarge); + assert(sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog) <= wkspSize); + workSpace = (BYTE*)workSpace + sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog); + wkspSize -= sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog); + + CHECK_F( FSE_buildDTable_internal(dtable, wksp->ncount, maxSymbolValue, tableLog, workSpace, wkspSize) ); + + { + const void* ptr = dtable; + const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr; + const U32 fastMode = DTableH->fastMode; + + /* select fast mode (static) */ + if (fastMode) return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, dtable, 1); + return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, dtable, 0); + } +} + +/* Avoids the FORCE_INLINE of the _body() function. */ +static size_t FSE_decompress_wksp_body_default(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize) +{ + return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 0); +} + +#if DYNAMIC_BMI2 +BMI2_TARGET_ATTRIBUTE static size_t FSE_decompress_wksp_body_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize) +{ + return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 1); +} +#endif + +size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2) +{ +#if DYNAMIC_BMI2 + if (bmi2) { + return FSE_decompress_wksp_body_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize); + } +#endif + (void)bmi2; + return FSE_decompress_wksp_body_default(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize); +} + +#endif /* FSE_COMMONDEFS_ONLY */ diff --git a/c-blosc/internal-complibs/zstd-1.5.6/common/huf.h b/c-blosc/internal-complibs/zstd-1.5.6/common/huf.h new file mode 100644 index 0000000..99bf85d --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.5.6/common/huf.h @@ -0,0 +1,286 @@ +/* ****************************************************************** + * huff0 huffman codec, + * part of Finite State Entropy library + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * You can contact the author at : + * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ + +#if defined (__cplusplus) +extern "C" { +#endif + +#ifndef HUF_H_298734234 +#define HUF_H_298734234 + +/* *** Dependencies *** */ +#include "zstd_deps.h" /* size_t */ +#include "mem.h" /* U32 */ +#define FSE_STATIC_LINKING_ONLY +#include "fse.h" + + +/* *** Tool functions *** */ +#define HUF_BLOCKSIZE_MAX (128 * 1024) /**< maximum input size for a single block compressed with HUF_compress */ +size_t HUF_compressBound(size_t size); /**< maximum compressed size (worst case) */ + +/* Error Management */ +unsigned HUF_isError(size_t code); /**< tells if a return value is an error code */ +const char* HUF_getErrorName(size_t code); /**< provides error code string (useful for debugging) */ + + +#define HUF_WORKSPACE_SIZE ((8 << 10) + 512 /* sorting scratch space */) +#define HUF_WORKSPACE_SIZE_U64 (HUF_WORKSPACE_SIZE / sizeof(U64)) + +/* *** Constants *** */ +#define HUF_TABLELOG_MAX 12 /* max runtime value of tableLog (due to static allocation); can be modified up to HUF_TABLELOG_ABSOLUTEMAX */ +#define HUF_TABLELOG_DEFAULT 11 /* default tableLog value when none specified */ +#define HUF_SYMBOLVALUE_MAX 255 + +#define HUF_TABLELOG_ABSOLUTEMAX 12 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */ +#if (HUF_TABLELOG_MAX > HUF_TABLELOG_ABSOLUTEMAX) +# error "HUF_TABLELOG_MAX is too large !" +#endif + + +/* **************************************** +* Static allocation +******************************************/ +/* HUF buffer bounds */ +#define HUF_CTABLEBOUND 129 +#define HUF_BLOCKBOUND(size) (size + (size>>8) + 8) /* only true when incompressible is pre-filtered with fast heuristic */ +#define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size)) /* Macro version, useful for static allocation */ + +/* static allocation of HUF's Compression Table */ +/* this is a private definition, just exposed for allocation and strict aliasing purpose. never EVER access its members directly */ +typedef size_t HUF_CElt; /* consider it an incomplete type */ +#define HUF_CTABLE_SIZE_ST(maxSymbolValue) ((maxSymbolValue)+2) /* Use tables of size_t, for proper alignment */ +#define HUF_CTABLE_SIZE(maxSymbolValue) (HUF_CTABLE_SIZE_ST(maxSymbolValue) * sizeof(size_t)) +#define HUF_CREATE_STATIC_CTABLE(name, maxSymbolValue) \ + HUF_CElt name[HUF_CTABLE_SIZE_ST(maxSymbolValue)] /* no final ; */ + +/* static allocation of HUF's DTable */ +typedef U32 HUF_DTable; +#define HUF_DTABLE_SIZE(maxTableLog) (1 + (1<<(maxTableLog))) +#define HUF_CREATE_STATIC_DTABLEX1(DTable, maxTableLog) \ + HUF_DTable DTable[HUF_DTABLE_SIZE((maxTableLog)-1)] = { ((U32)((maxTableLog)-1) * 0x01000001) } +#define HUF_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) \ + HUF_DTable DTable[HUF_DTABLE_SIZE(maxTableLog)] = { ((U32)(maxTableLog) * 0x01000001) } + + +/* **************************************** +* Advanced decompression functions +******************************************/ + +/** + * Huffman flags bitset. + * For all flags, 0 is the default value. + */ +typedef enum { + /** + * If compiled with DYNAMIC_BMI2: Set flag only if the CPU supports BMI2 at runtime. + * Otherwise: Ignored. + */ + HUF_flags_bmi2 = (1 << 0), + /** + * If set: Test possible table depths to find the one that produces the smallest header + encoded size. + * If unset: Use heuristic to find the table depth. + */ + HUF_flags_optimalDepth = (1 << 1), + /** + * If set: If the previous table can encode the input, always reuse the previous table. + * If unset: If the previous table can encode the input, reuse the previous table if it results in a smaller output. + */ + HUF_flags_preferRepeat = (1 << 2), + /** + * If set: Sample the input and check if the sample is uncompressible, if it is then don't attempt to compress. + * If unset: Always histogram the entire input. + */ + HUF_flags_suspectUncompressible = (1 << 3), + /** + * If set: Don't use assembly implementations + * If unset: Allow using assembly implementations + */ + HUF_flags_disableAsm = (1 << 4), + /** + * If set: Don't use the fast decoding loop, always use the fallback decoding loop. + * If unset: Use the fast decoding loop when possible. + */ + HUF_flags_disableFast = (1 << 5) +} HUF_flags_e; + + +/* **************************************** + * HUF detailed API + * ****************************************/ +#define HUF_OPTIMAL_DEPTH_THRESHOLD ZSTD_btultra + +/*! HUF_compress() does the following: + * 1. count symbol occurrence from source[] into table count[] using FSE_count() (exposed within "fse.h") + * 2. (optional) refine tableLog using HUF_optimalTableLog() + * 3. build Huffman table from count using HUF_buildCTable() + * 4. save Huffman table to memory buffer using HUF_writeCTable() + * 5. encode the data stream using HUF_compress4X_usingCTable() + * + * The following API allows targeting specific sub-functions for advanced tasks. + * For example, it's possible to compress several blocks using the same 'CTable', + * or to save and regenerate 'CTable' using external methods. + */ +unsigned HUF_minTableLog(unsigned symbolCardinality); +unsigned HUF_cardinality(const unsigned* count, unsigned maxSymbolValue); +unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, void* workSpace, + size_t wkspSize, HUF_CElt* table, const unsigned* count, int flags); /* table is used as scratch space for building and testing tables, not a return value */ +size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog, void* workspace, size_t workspaceSize); +size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags); +size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue); +int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue); + +typedef enum { + HUF_repeat_none, /**< Cannot use the previous table */ + HUF_repeat_check, /**< Can use the previous table but it must be checked. Note : The previous table must have been constructed by HUF_compress{1, 4}X_repeat */ + HUF_repeat_valid /**< Can use the previous table and it is assumed to be valid */ + } HUF_repeat; + +/** HUF_compress4X_repeat() : + * Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none. + * If it uses hufTable it does not modify hufTable or repeat. + * If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used. + * If preferRepeat then the old table will always be used if valid. + * If suspectUncompressible then some sampling checks will be run to potentially skip huffman coding */ +size_t HUF_compress4X_repeat(void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned tableLog, + void* workSpace, size_t wkspSize, /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */ + HUF_CElt* hufTable, HUF_repeat* repeat, int flags); + +/** HUF_buildCTable_wksp() : + * Same as HUF_buildCTable(), but using externally allocated scratch buffer. + * `workSpace` must be aligned on 4-bytes boundaries, and its size must be >= HUF_CTABLE_WORKSPACE_SIZE. + */ +#define HUF_CTABLE_WORKSPACE_SIZE_U32 ((4 * (HUF_SYMBOLVALUE_MAX + 1)) + 192) +#define HUF_CTABLE_WORKSPACE_SIZE (HUF_CTABLE_WORKSPACE_SIZE_U32 * sizeof(unsigned)) +size_t HUF_buildCTable_wksp (HUF_CElt* tree, + const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, + void* workSpace, size_t wkspSize); + +/*! HUF_readStats() : + * Read compact Huffman tree, saved by HUF_writeCTable(). + * `huffWeight` is destination buffer. + * @return : size read from `src` , or an error Code . + * Note : Needed by HUF_readCTable() and HUF_readDTableXn() . */ +size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, + U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr, + const void* src, size_t srcSize); + +/*! HUF_readStats_wksp() : + * Same as HUF_readStats() but takes an external workspace which must be + * 4-byte aligned and its size must be >= HUF_READ_STATS_WORKSPACE_SIZE. + * If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0. + */ +#define HUF_READ_STATS_WORKSPACE_SIZE_U32 FSE_DECOMPRESS_WKSP_SIZE_U32(6, HUF_TABLELOG_MAX-1) +#define HUF_READ_STATS_WORKSPACE_SIZE (HUF_READ_STATS_WORKSPACE_SIZE_U32 * sizeof(unsigned)) +size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize, + U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr, + const void* src, size_t srcSize, + void* workspace, size_t wkspSize, + int flags); + +/** HUF_readCTable() : + * Loading a CTable saved with HUF_writeCTable() */ +size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned *hasZeroWeights); + +/** HUF_getNbBitsFromCTable() : + * Read nbBits from CTable symbolTable, for symbol `symbolValue` presumed <= HUF_SYMBOLVALUE_MAX + * Note 1 : If symbolValue > HUF_readCTableHeader(symbolTable).maxSymbolValue, returns 0 + * Note 2 : is not inlined, as HUF_CElt definition is private + */ +U32 HUF_getNbBitsFromCTable(const HUF_CElt* symbolTable, U32 symbolValue); + +typedef struct { + BYTE tableLog; + BYTE maxSymbolValue; + BYTE unused[sizeof(size_t) - 2]; +} HUF_CTableHeader; + +/** HUF_readCTableHeader() : + * @returns The header from the CTable specifying the tableLog and the maxSymbolValue. + */ +HUF_CTableHeader HUF_readCTableHeader(HUF_CElt const* ctable); + +/* + * HUF_decompress() does the following: + * 1. select the decompression algorithm (X1, X2) based on pre-computed heuristics + * 2. build Huffman table from save, using HUF_readDTableX?() + * 3. decode 1 or 4 segments in parallel using HUF_decompress?X?_usingDTable() + */ + +/** HUF_selectDecoder() : + * Tells which decoder is likely to decode faster, + * based on a set of pre-computed metrics. + * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 . + * Assumption : 0 < dstSize <= 128 KB */ +U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize); + +/** + * The minimum workspace size for the `workSpace` used in + * HUF_readDTableX1_wksp() and HUF_readDTableX2_wksp(). + * + * The space used depends on HUF_TABLELOG_MAX, ranging from ~1500 bytes when + * HUF_TABLE_LOG_MAX=12 to ~1850 bytes when HUF_TABLE_LOG_MAX=15. + * Buffer overflow errors may potentially occur if code modifications result in + * a required workspace size greater than that specified in the following + * macro. + */ +#define HUF_DECOMPRESS_WORKSPACE_SIZE ((2 << 10) + (1 << 9)) +#define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32)) + + +/* ====================== */ +/* single stream variants */ +/* ====================== */ + +size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags); +/** HUF_compress1X_repeat() : + * Same as HUF_compress1X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none. + * If it uses hufTable it does not modify hufTable or repeat. + * If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used. + * If preferRepeat then the old table will always be used if valid. + * If suspectUncompressible then some sampling checks will be run to potentially skip huffman coding */ +size_t HUF_compress1X_repeat(void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned tableLog, + void* workSpace, size_t wkspSize, /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */ + HUF_CElt* hufTable, HUF_repeat* repeat, int flags); + +size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags); +#ifndef HUF_FORCE_DECOMPRESS_X1 +size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags); /**< double-symbols decoder */ +#endif + +/* BMI2 variants. + * If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0. + */ +size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int flags); +#ifndef HUF_FORCE_DECOMPRESS_X2 +size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags); +#endif +size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int flags); +size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags); +#ifndef HUF_FORCE_DECOMPRESS_X2 +size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int flags); +#endif +#ifndef HUF_FORCE_DECOMPRESS_X1 +size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int flags); +#endif + +#endif /* HUF_H_298734234 */ + +#if defined (__cplusplus) +} +#endif diff --git a/c-blosc/internal-complibs/zstd-1.5.6/common/mem.h b/c-blosc/internal-complibs/zstd-1.5.6/common/mem.h new file mode 100644 index 0000000..096f4be --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.5.6/common/mem.h @@ -0,0 +1,426 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef MEM_H_MODULE +#define MEM_H_MODULE + +#if defined (__cplusplus) +extern "C" { +#endif + +/*-**************************************** +* Dependencies +******************************************/ +#include /* size_t, ptrdiff_t */ +#include "compiler.h" /* __has_builtin */ +#include "debug.h" /* DEBUG_STATIC_ASSERT */ +#include "zstd_deps.h" /* ZSTD_memcpy */ + + +/*-**************************************** +* Compiler specifics +******************************************/ +#if defined(_MSC_VER) /* Visual Studio */ +# include /* _byteswap_ulong */ +# include /* _byteswap_* */ +#endif + +/*-************************************************************** +* Basic Types +*****************************************************************/ +#if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) +# if defined(_AIX) +# include +# else +# include /* intptr_t */ +# endif + typedef uint8_t BYTE; + typedef uint8_t U8; + typedef int8_t S8; + typedef uint16_t U16; + typedef int16_t S16; + typedef uint32_t U32; + typedef int32_t S32; + typedef uint64_t U64; + typedef int64_t S64; +#else +# include +#if CHAR_BIT != 8 +# error "this implementation requires char to be exactly 8-bit type" +#endif + typedef unsigned char BYTE; + typedef unsigned char U8; + typedef signed char S8; +#if USHRT_MAX != 65535 +# error "this implementation requires short to be exactly 16-bit type" +#endif + typedef unsigned short U16; + typedef signed short S16; +#if UINT_MAX != 4294967295 +# error "this implementation requires int to be exactly 32-bit type" +#endif + typedef unsigned int U32; + typedef signed int S32; +/* note : there are no limits defined for long long type in C90. + * limits exist in C99, however, in such case, is preferred */ + typedef unsigned long long U64; + typedef signed long long S64; +#endif + + +/*-************************************************************** +* Memory I/O API +*****************************************************************/ +/*=== Static platform detection ===*/ +MEM_STATIC unsigned MEM_32bits(void); +MEM_STATIC unsigned MEM_64bits(void); +MEM_STATIC unsigned MEM_isLittleEndian(void); + +/*=== Native unaligned read/write ===*/ +MEM_STATIC U16 MEM_read16(const void* memPtr); +MEM_STATIC U32 MEM_read32(const void* memPtr); +MEM_STATIC U64 MEM_read64(const void* memPtr); +MEM_STATIC size_t MEM_readST(const void* memPtr); + +MEM_STATIC void MEM_write16(void* memPtr, U16 value); +MEM_STATIC void MEM_write32(void* memPtr, U32 value); +MEM_STATIC void MEM_write64(void* memPtr, U64 value); + +/*=== Little endian unaligned read/write ===*/ +MEM_STATIC U16 MEM_readLE16(const void* memPtr); +MEM_STATIC U32 MEM_readLE24(const void* memPtr); +MEM_STATIC U32 MEM_readLE32(const void* memPtr); +MEM_STATIC U64 MEM_readLE64(const void* memPtr); +MEM_STATIC size_t MEM_readLEST(const void* memPtr); + +MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val); +MEM_STATIC void MEM_writeLE24(void* memPtr, U32 val); +MEM_STATIC void MEM_writeLE32(void* memPtr, U32 val32); +MEM_STATIC void MEM_writeLE64(void* memPtr, U64 val64); +MEM_STATIC void MEM_writeLEST(void* memPtr, size_t val); + +/*=== Big endian unaligned read/write ===*/ +MEM_STATIC U32 MEM_readBE32(const void* memPtr); +MEM_STATIC U64 MEM_readBE64(const void* memPtr); +MEM_STATIC size_t MEM_readBEST(const void* memPtr); + +MEM_STATIC void MEM_writeBE32(void* memPtr, U32 val32); +MEM_STATIC void MEM_writeBE64(void* memPtr, U64 val64); +MEM_STATIC void MEM_writeBEST(void* memPtr, size_t val); + +/*=== Byteswap ===*/ +MEM_STATIC U32 MEM_swap32(U32 in); +MEM_STATIC U64 MEM_swap64(U64 in); +MEM_STATIC size_t MEM_swapST(size_t in); + + +/*-************************************************************** +* Memory I/O Implementation +*****************************************************************/ +/* MEM_FORCE_MEMORY_ACCESS : For accessing unaligned memory: + * Method 0 : always use `memcpy()`. Safe and portable. + * Method 1 : Use compiler extension to set unaligned access. + * Method 2 : direct access. This method is portable but violate C standard. + * It can generate buggy code on targets depending on alignment. + * Default : method 1 if supported, else method 0 + */ +#ifndef MEM_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ +# ifdef __GNUC__ +# define MEM_FORCE_MEMORY_ACCESS 1 +# endif +#endif + +MEM_STATIC unsigned MEM_32bits(void) { return sizeof(size_t)==4; } +MEM_STATIC unsigned MEM_64bits(void) { return sizeof(size_t)==8; } + +MEM_STATIC unsigned MEM_isLittleEndian(void) +{ +#if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + return 1; +#elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) + return 0; +#elif defined(__clang__) && __LITTLE_ENDIAN__ + return 1; +#elif defined(__clang__) && __BIG_ENDIAN__ + return 0; +#elif defined(_MSC_VER) && (_M_AMD64 || _M_IX86) + return 1; +#elif defined(__DMC__) && defined(_M_IX86) + return 1; +#else + const union { U32 u; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */ + return one.c[0]; +#endif +} + +#if defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==2) + +/* violates C standard, by lying on structure alignment. +Only use if no other choice to achieve best performance on target platform */ +MEM_STATIC U16 MEM_read16(const void* memPtr) { return *(const U16*) memPtr; } +MEM_STATIC U32 MEM_read32(const void* memPtr) { return *(const U32*) memPtr; } +MEM_STATIC U64 MEM_read64(const void* memPtr) { return *(const U64*) memPtr; } +MEM_STATIC size_t MEM_readST(const void* memPtr) { return *(const size_t*) memPtr; } + +MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; } +MEM_STATIC void MEM_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; } +MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(U64*)memPtr = value; } + +#elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1) + +typedef __attribute__((aligned(1))) U16 unalign16; +typedef __attribute__((aligned(1))) U32 unalign32; +typedef __attribute__((aligned(1))) U64 unalign64; +typedef __attribute__((aligned(1))) size_t unalignArch; + +MEM_STATIC U16 MEM_read16(const void* ptr) { return *(const unalign16*)ptr; } +MEM_STATIC U32 MEM_read32(const void* ptr) { return *(const unalign32*)ptr; } +MEM_STATIC U64 MEM_read64(const void* ptr) { return *(const unalign64*)ptr; } +MEM_STATIC size_t MEM_readST(const void* ptr) { return *(const unalignArch*)ptr; } + +MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(unalign16*)memPtr = value; } +MEM_STATIC void MEM_write32(void* memPtr, U32 value) { *(unalign32*)memPtr = value; } +MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(unalign64*)memPtr = value; } + +#else + +/* default method, safe and standard. + can sometimes prove slower */ + +MEM_STATIC U16 MEM_read16(const void* memPtr) +{ + U16 val; ZSTD_memcpy(&val, memPtr, sizeof(val)); return val; +} + +MEM_STATIC U32 MEM_read32(const void* memPtr) +{ + U32 val; ZSTD_memcpy(&val, memPtr, sizeof(val)); return val; +} + +MEM_STATIC U64 MEM_read64(const void* memPtr) +{ + U64 val; ZSTD_memcpy(&val, memPtr, sizeof(val)); return val; +} + +MEM_STATIC size_t MEM_readST(const void* memPtr) +{ + size_t val; ZSTD_memcpy(&val, memPtr, sizeof(val)); return val; +} + +MEM_STATIC void MEM_write16(void* memPtr, U16 value) +{ + ZSTD_memcpy(memPtr, &value, sizeof(value)); +} + +MEM_STATIC void MEM_write32(void* memPtr, U32 value) +{ + ZSTD_memcpy(memPtr, &value, sizeof(value)); +} + +MEM_STATIC void MEM_write64(void* memPtr, U64 value) +{ + ZSTD_memcpy(memPtr, &value, sizeof(value)); +} + +#endif /* MEM_FORCE_MEMORY_ACCESS */ + +MEM_STATIC U32 MEM_swap32_fallback(U32 in) +{ + return ((in << 24) & 0xff000000 ) | + ((in << 8) & 0x00ff0000 ) | + ((in >> 8) & 0x0000ff00 ) | + ((in >> 24) & 0x000000ff ); +} + +MEM_STATIC U32 MEM_swap32(U32 in) +{ +#if defined(_MSC_VER) /* Visual Studio */ + return _byteswap_ulong(in); +#elif (defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)) \ + || (defined(__clang__) && __has_builtin(__builtin_bswap32)) + return __builtin_bswap32(in); +#else + return MEM_swap32_fallback(in); +#endif +} + +MEM_STATIC U64 MEM_swap64_fallback(U64 in) +{ + return ((in << 56) & 0xff00000000000000ULL) | + ((in << 40) & 0x00ff000000000000ULL) | + ((in << 24) & 0x0000ff0000000000ULL) | + ((in << 8) & 0x000000ff00000000ULL) | + ((in >> 8) & 0x00000000ff000000ULL) | + ((in >> 24) & 0x0000000000ff0000ULL) | + ((in >> 40) & 0x000000000000ff00ULL) | + ((in >> 56) & 0x00000000000000ffULL); +} + +MEM_STATIC U64 MEM_swap64(U64 in) +{ +#if defined(_MSC_VER) /* Visual Studio */ + return _byteswap_uint64(in); +#elif (defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)) \ + || (defined(__clang__) && __has_builtin(__builtin_bswap64)) + return __builtin_bswap64(in); +#else + return MEM_swap64_fallback(in); +#endif +} + +MEM_STATIC size_t MEM_swapST(size_t in) +{ + if (MEM_32bits()) + return (size_t)MEM_swap32((U32)in); + else + return (size_t)MEM_swap64((U64)in); +} + +/*=== Little endian r/w ===*/ + +MEM_STATIC U16 MEM_readLE16(const void* memPtr) +{ + if (MEM_isLittleEndian()) + return MEM_read16(memPtr); + else { + const BYTE* p = (const BYTE*)memPtr; + return (U16)(p[0] + (p[1]<<8)); + } +} + +MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val) +{ + if (MEM_isLittleEndian()) { + MEM_write16(memPtr, val); + } else { + BYTE* p = (BYTE*)memPtr; + p[0] = (BYTE)val; + p[1] = (BYTE)(val>>8); + } +} + +MEM_STATIC U32 MEM_readLE24(const void* memPtr) +{ + return (U32)MEM_readLE16(memPtr) + ((U32)(((const BYTE*)memPtr)[2]) << 16); +} + +MEM_STATIC void MEM_writeLE24(void* memPtr, U32 val) +{ + MEM_writeLE16(memPtr, (U16)val); + ((BYTE*)memPtr)[2] = (BYTE)(val>>16); +} + +MEM_STATIC U32 MEM_readLE32(const void* memPtr) +{ + if (MEM_isLittleEndian()) + return MEM_read32(memPtr); + else + return MEM_swap32(MEM_read32(memPtr)); +} + +MEM_STATIC void MEM_writeLE32(void* memPtr, U32 val32) +{ + if (MEM_isLittleEndian()) + MEM_write32(memPtr, val32); + else + MEM_write32(memPtr, MEM_swap32(val32)); +} + +MEM_STATIC U64 MEM_readLE64(const void* memPtr) +{ + if (MEM_isLittleEndian()) + return MEM_read64(memPtr); + else + return MEM_swap64(MEM_read64(memPtr)); +} + +MEM_STATIC void MEM_writeLE64(void* memPtr, U64 val64) +{ + if (MEM_isLittleEndian()) + MEM_write64(memPtr, val64); + else + MEM_write64(memPtr, MEM_swap64(val64)); +} + +MEM_STATIC size_t MEM_readLEST(const void* memPtr) +{ + if (MEM_32bits()) + return (size_t)MEM_readLE32(memPtr); + else + return (size_t)MEM_readLE64(memPtr); +} + +MEM_STATIC void MEM_writeLEST(void* memPtr, size_t val) +{ + if (MEM_32bits()) + MEM_writeLE32(memPtr, (U32)val); + else + MEM_writeLE64(memPtr, (U64)val); +} + +/*=== Big endian r/w ===*/ + +MEM_STATIC U32 MEM_readBE32(const void* memPtr) +{ + if (MEM_isLittleEndian()) + return MEM_swap32(MEM_read32(memPtr)); + else + return MEM_read32(memPtr); +} + +MEM_STATIC void MEM_writeBE32(void* memPtr, U32 val32) +{ + if (MEM_isLittleEndian()) + MEM_write32(memPtr, MEM_swap32(val32)); + else + MEM_write32(memPtr, val32); +} + +MEM_STATIC U64 MEM_readBE64(const void* memPtr) +{ + if (MEM_isLittleEndian()) + return MEM_swap64(MEM_read64(memPtr)); + else + return MEM_read64(memPtr); +} + +MEM_STATIC void MEM_writeBE64(void* memPtr, U64 val64) +{ + if (MEM_isLittleEndian()) + MEM_write64(memPtr, MEM_swap64(val64)); + else + MEM_write64(memPtr, val64); +} + +MEM_STATIC size_t MEM_readBEST(const void* memPtr) +{ + if (MEM_32bits()) + return (size_t)MEM_readBE32(memPtr); + else + return (size_t)MEM_readBE64(memPtr); +} + +MEM_STATIC void MEM_writeBEST(void* memPtr, size_t val) +{ + if (MEM_32bits()) + MEM_writeBE32(memPtr, (U32)val); + else + MEM_writeBE64(memPtr, (U64)val); +} + +/* code only tested on 32 and 64 bits systems */ +MEM_STATIC void MEM_check(void) { DEBUG_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); } + + +#if defined (__cplusplus) +} +#endif + +#endif /* MEM_H_MODULE */ diff --git a/c-blosc/internal-complibs/zstd-1.5.6/common/pool.c b/c-blosc/internal-complibs/zstd-1.5.6/common/pool.c new file mode 100644 index 0000000..3adcefc --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.5.6/common/pool.c @@ -0,0 +1,371 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +/* ====== Dependencies ======= */ +#include "../common/allocations.h" /* ZSTD_customCalloc, ZSTD_customFree */ +#include "zstd_deps.h" /* size_t */ +#include "debug.h" /* assert */ +#include "pool.h" + +/* ====== Compiler specifics ====== */ +#if defined(_MSC_VER) +# pragma warning(disable : 4204) /* disable: C4204: non-constant aggregate initializer */ +#endif + + +#ifdef ZSTD_MULTITHREAD + +#include "threading.h" /* pthread adaptation */ + +/* A job is a function and an opaque argument */ +typedef struct POOL_job_s { + POOL_function function; + void *opaque; +} POOL_job; + +struct POOL_ctx_s { + ZSTD_customMem customMem; + /* Keep track of the threads */ + ZSTD_pthread_t* threads; + size_t threadCapacity; + size_t threadLimit; + + /* The queue is a circular buffer */ + POOL_job *queue; + size_t queueHead; + size_t queueTail; + size_t queueSize; + + /* The number of threads working on jobs */ + size_t numThreadsBusy; + /* Indicates if the queue is empty */ + int queueEmpty; + + /* The mutex protects the queue */ + ZSTD_pthread_mutex_t queueMutex; + /* Condition variable for pushers to wait on when the queue is full */ + ZSTD_pthread_cond_t queuePushCond; + /* Condition variables for poppers to wait on when the queue is empty */ + ZSTD_pthread_cond_t queuePopCond; + /* Indicates if the queue is shutting down */ + int shutdown; +}; + +/* POOL_thread() : + * Work thread for the thread pool. + * Waits for jobs and executes them. + * @returns : NULL on failure else non-null. + */ +static void* POOL_thread(void* opaque) { + POOL_ctx* const ctx = (POOL_ctx*)opaque; + if (!ctx) { return NULL; } + for (;;) { + /* Lock the mutex and wait for a non-empty queue or until shutdown */ + ZSTD_pthread_mutex_lock(&ctx->queueMutex); + + while ( ctx->queueEmpty + || (ctx->numThreadsBusy >= ctx->threadLimit) ) { + if (ctx->shutdown) { + /* even if !queueEmpty, (possible if numThreadsBusy >= threadLimit), + * a few threads will be shutdown while !queueEmpty, + * but enough threads will remain active to finish the queue */ + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); + return opaque; + } + ZSTD_pthread_cond_wait(&ctx->queuePopCond, &ctx->queueMutex); + } + /* Pop a job off the queue */ + { POOL_job const job = ctx->queue[ctx->queueHead]; + ctx->queueHead = (ctx->queueHead + 1) % ctx->queueSize; + ctx->numThreadsBusy++; + ctx->queueEmpty = (ctx->queueHead == ctx->queueTail); + /* Unlock the mutex, signal a pusher, and run the job */ + ZSTD_pthread_cond_signal(&ctx->queuePushCond); + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); + + job.function(job.opaque); + + /* If the intended queue size was 0, signal after finishing job */ + ZSTD_pthread_mutex_lock(&ctx->queueMutex); + ctx->numThreadsBusy--; + ZSTD_pthread_cond_signal(&ctx->queuePushCond); + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); + } + } /* for (;;) */ + assert(0); /* Unreachable */ +} + +/* ZSTD_createThreadPool() : public access point */ +POOL_ctx* ZSTD_createThreadPool(size_t numThreads) { + return POOL_create (numThreads, 0); +} + +POOL_ctx* POOL_create(size_t numThreads, size_t queueSize) { + return POOL_create_advanced(numThreads, queueSize, ZSTD_defaultCMem); +} + +POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, + ZSTD_customMem customMem) +{ + POOL_ctx* ctx; + /* Check parameters */ + if (!numThreads) { return NULL; } + /* Allocate the context and zero initialize */ + ctx = (POOL_ctx*)ZSTD_customCalloc(sizeof(POOL_ctx), customMem); + if (!ctx) { return NULL; } + /* Initialize the job queue. + * It needs one extra space since one space is wasted to differentiate + * empty and full queues. + */ + ctx->queueSize = queueSize + 1; + ctx->queue = (POOL_job*)ZSTD_customCalloc(ctx->queueSize * sizeof(POOL_job), customMem); + ctx->queueHead = 0; + ctx->queueTail = 0; + ctx->numThreadsBusy = 0; + ctx->queueEmpty = 1; + { + int error = 0; + error |= ZSTD_pthread_mutex_init(&ctx->queueMutex, NULL); + error |= ZSTD_pthread_cond_init(&ctx->queuePushCond, NULL); + error |= ZSTD_pthread_cond_init(&ctx->queuePopCond, NULL); + if (error) { POOL_free(ctx); return NULL; } + } + ctx->shutdown = 0; + /* Allocate space for the thread handles */ + ctx->threads = (ZSTD_pthread_t*)ZSTD_customCalloc(numThreads * sizeof(ZSTD_pthread_t), customMem); + ctx->threadCapacity = 0; + ctx->customMem = customMem; + /* Check for errors */ + if (!ctx->threads || !ctx->queue) { POOL_free(ctx); return NULL; } + /* Initialize the threads */ + { size_t i; + for (i = 0; i < numThreads; ++i) { + if (ZSTD_pthread_create(&ctx->threads[i], NULL, &POOL_thread, ctx)) { + ctx->threadCapacity = i; + POOL_free(ctx); + return NULL; + } } + ctx->threadCapacity = numThreads; + ctx->threadLimit = numThreads; + } + return ctx; +} + +/*! POOL_join() : + Shutdown the queue, wake any sleeping threads, and join all of the threads. +*/ +static void POOL_join(POOL_ctx* ctx) { + /* Shut down the queue */ + ZSTD_pthread_mutex_lock(&ctx->queueMutex); + ctx->shutdown = 1; + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); + /* Wake up sleeping threads */ + ZSTD_pthread_cond_broadcast(&ctx->queuePushCond); + ZSTD_pthread_cond_broadcast(&ctx->queuePopCond); + /* Join all of the threads */ + { size_t i; + for (i = 0; i < ctx->threadCapacity; ++i) { + ZSTD_pthread_join(ctx->threads[i]); /* note : could fail */ + } } +} + +void POOL_free(POOL_ctx *ctx) { + if (!ctx) { return; } + POOL_join(ctx); + ZSTD_pthread_mutex_destroy(&ctx->queueMutex); + ZSTD_pthread_cond_destroy(&ctx->queuePushCond); + ZSTD_pthread_cond_destroy(&ctx->queuePopCond); + ZSTD_customFree(ctx->queue, ctx->customMem); + ZSTD_customFree(ctx->threads, ctx->customMem); + ZSTD_customFree(ctx, ctx->customMem); +} + +/*! POOL_joinJobs() : + * Waits for all queued jobs to finish executing. + */ +void POOL_joinJobs(POOL_ctx* ctx) { + ZSTD_pthread_mutex_lock(&ctx->queueMutex); + while(!ctx->queueEmpty || ctx->numThreadsBusy > 0) { + ZSTD_pthread_cond_wait(&ctx->queuePushCond, &ctx->queueMutex); + } + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); +} + +void ZSTD_freeThreadPool (ZSTD_threadPool* pool) { + POOL_free (pool); +} + +size_t POOL_sizeof(const POOL_ctx* ctx) { + if (ctx==NULL) return 0; /* supports sizeof NULL */ + return sizeof(*ctx) + + ctx->queueSize * sizeof(POOL_job) + + ctx->threadCapacity * sizeof(ZSTD_pthread_t); +} + + +/* @return : 0 on success, 1 on error */ +static int POOL_resize_internal(POOL_ctx* ctx, size_t numThreads) +{ + if (numThreads <= ctx->threadCapacity) { + if (!numThreads) return 1; + ctx->threadLimit = numThreads; + return 0; + } + /* numThreads > threadCapacity */ + { ZSTD_pthread_t* const threadPool = (ZSTD_pthread_t*)ZSTD_customCalloc(numThreads * sizeof(ZSTD_pthread_t), ctx->customMem); + if (!threadPool) return 1; + /* replace existing thread pool */ + ZSTD_memcpy(threadPool, ctx->threads, ctx->threadCapacity * sizeof(ZSTD_pthread_t)); + ZSTD_customFree(ctx->threads, ctx->customMem); + ctx->threads = threadPool; + /* Initialize additional threads */ + { size_t threadId; + for (threadId = ctx->threadCapacity; threadId < numThreads; ++threadId) { + if (ZSTD_pthread_create(&threadPool[threadId], NULL, &POOL_thread, ctx)) { + ctx->threadCapacity = threadId; + return 1; + } } + } } + /* successfully expanded */ + ctx->threadCapacity = numThreads; + ctx->threadLimit = numThreads; + return 0; +} + +/* @return : 0 on success, 1 on error */ +int POOL_resize(POOL_ctx* ctx, size_t numThreads) +{ + int result; + if (ctx==NULL) return 1; + ZSTD_pthread_mutex_lock(&ctx->queueMutex); + result = POOL_resize_internal(ctx, numThreads); + ZSTD_pthread_cond_broadcast(&ctx->queuePopCond); + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); + return result; +} + +/** + * Returns 1 if the queue is full and 0 otherwise. + * + * When queueSize is 1 (pool was created with an intended queueSize of 0), + * then a queue is empty if there is a thread free _and_ no job is waiting. + */ +static int isQueueFull(POOL_ctx const* ctx) { + if (ctx->queueSize > 1) { + return ctx->queueHead == ((ctx->queueTail + 1) % ctx->queueSize); + } else { + return (ctx->numThreadsBusy == ctx->threadLimit) || + !ctx->queueEmpty; + } +} + + +static void +POOL_add_internal(POOL_ctx* ctx, POOL_function function, void *opaque) +{ + POOL_job job; + job.function = function; + job.opaque = opaque; + assert(ctx != NULL); + if (ctx->shutdown) return; + + ctx->queueEmpty = 0; + ctx->queue[ctx->queueTail] = job; + ctx->queueTail = (ctx->queueTail + 1) % ctx->queueSize; + ZSTD_pthread_cond_signal(&ctx->queuePopCond); +} + +void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque) +{ + assert(ctx != NULL); + ZSTD_pthread_mutex_lock(&ctx->queueMutex); + /* Wait until there is space in the queue for the new job */ + while (isQueueFull(ctx) && (!ctx->shutdown)) { + ZSTD_pthread_cond_wait(&ctx->queuePushCond, &ctx->queueMutex); + } + POOL_add_internal(ctx, function, opaque); + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); +} + + +int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque) +{ + assert(ctx != NULL); + ZSTD_pthread_mutex_lock(&ctx->queueMutex); + if (isQueueFull(ctx)) { + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); + return 0; + } + POOL_add_internal(ctx, function, opaque); + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); + return 1; +} + + +#else /* ZSTD_MULTITHREAD not defined */ + +/* ========================== */ +/* No multi-threading support */ +/* ========================== */ + + +/* We don't need any data, but if it is empty, malloc() might return NULL. */ +struct POOL_ctx_s { + int dummy; +}; +static POOL_ctx g_poolCtx; + +POOL_ctx* POOL_create(size_t numThreads, size_t queueSize) { + return POOL_create_advanced(numThreads, queueSize, ZSTD_defaultCMem); +} + +POOL_ctx* +POOL_create_advanced(size_t numThreads, size_t queueSize, ZSTD_customMem customMem) +{ + (void)numThreads; + (void)queueSize; + (void)customMem; + return &g_poolCtx; +} + +void POOL_free(POOL_ctx* ctx) { + assert(!ctx || ctx == &g_poolCtx); + (void)ctx; +} + +void POOL_joinJobs(POOL_ctx* ctx){ + assert(!ctx || ctx == &g_poolCtx); + (void)ctx; +} + +int POOL_resize(POOL_ctx* ctx, size_t numThreads) { + (void)ctx; (void)numThreads; + return 0; +} + +void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque) { + (void)ctx; + function(opaque); +} + +int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque) { + (void)ctx; + function(opaque); + return 1; +} + +size_t POOL_sizeof(const POOL_ctx* ctx) { + if (ctx==NULL) return 0; /* supports sizeof NULL */ + assert(ctx == &g_poolCtx); + return sizeof(*ctx); +} + +#endif /* ZSTD_MULTITHREAD */ diff --git a/c-blosc/internal-complibs/zstd-1.5.6/common/pool.h b/c-blosc/internal-complibs/zstd-1.5.6/common/pool.h new file mode 100644 index 0000000..cca4de7 --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.5.6/common/pool.h @@ -0,0 +1,90 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef POOL_H +#define POOL_H + +#if defined (__cplusplus) +extern "C" { +#endif + + +#include "zstd_deps.h" +#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_customMem */ +#include "../zstd.h" + +typedef struct POOL_ctx_s POOL_ctx; + +/*! POOL_create() : + * Create a thread pool with at most `numThreads` threads. + * `numThreads` must be at least 1. + * The maximum number of queued jobs before blocking is `queueSize`. + * @return : POOL_ctx pointer on success, else NULL. +*/ +POOL_ctx* POOL_create(size_t numThreads, size_t queueSize); + +POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, + ZSTD_customMem customMem); + +/*! POOL_free() : + * Free a thread pool returned by POOL_create(). + */ +void POOL_free(POOL_ctx* ctx); + + +/*! POOL_joinJobs() : + * Waits for all queued jobs to finish executing. + */ +void POOL_joinJobs(POOL_ctx* ctx); + +/*! POOL_resize() : + * Expands or shrinks pool's number of threads. + * This is more efficient than releasing + creating a new context, + * since it tries to preserve and reuse existing threads. + * `numThreads` must be at least 1. + * @return : 0 when resize was successful, + * !0 (typically 1) if there is an error. + * note : only numThreads can be resized, queueSize remains unchanged. + */ +int POOL_resize(POOL_ctx* ctx, size_t numThreads); + +/*! POOL_sizeof() : + * @return threadpool memory usage + * note : compatible with NULL (returns 0 in this case) + */ +size_t POOL_sizeof(const POOL_ctx* ctx); + +/*! POOL_function : + * The function type that can be added to a thread pool. + */ +typedef void (*POOL_function)(void*); + +/*! POOL_add() : + * Add the job `function(opaque)` to the thread pool. `ctx` must be valid. + * Possibly blocks until there is room in the queue. + * Note : The function may be executed asynchronously, + * therefore, `opaque` must live until function has been completed. + */ +void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque); + + +/*! POOL_tryAdd() : + * Add the job `function(opaque)` to thread pool _if_ a queue slot is available. + * Returns immediately even if not (does not block). + * @return : 1 if successful, 0 if not. + */ +int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque); + + +#if defined (__cplusplus) +} +#endif + +#endif diff --git a/c-blosc/internal-complibs/zstd-1.5.6/common/portability_macros.h b/c-blosc/internal-complibs/zstd-1.5.6/common/portability_macros.h new file mode 100644 index 0000000..e50314a --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.5.6/common/portability_macros.h @@ -0,0 +1,158 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_PORTABILITY_MACROS_H +#define ZSTD_PORTABILITY_MACROS_H + +/** + * This header file contains macro definitions to support portability. + * This header is shared between C and ASM code, so it MUST only + * contain macro definitions. It MUST not contain any C code. + * + * This header ONLY defines macros to detect platforms/feature support. + * + */ + + +/* compat. with non-clang compilers */ +#ifndef __has_attribute + #define __has_attribute(x) 0 +#endif + +/* compat. with non-clang compilers */ +#ifndef __has_builtin +# define __has_builtin(x) 0 +#endif + +/* compat. with non-clang compilers */ +#ifndef __has_feature +# define __has_feature(x) 0 +#endif + +/* detects whether we are being compiled under msan */ +#ifndef ZSTD_MEMORY_SANITIZER +# if __has_feature(memory_sanitizer) +# define ZSTD_MEMORY_SANITIZER 1 +# else +# define ZSTD_MEMORY_SANITIZER 0 +# endif +#endif + +/* detects whether we are being compiled under asan */ +#ifndef ZSTD_ADDRESS_SANITIZER +# if __has_feature(address_sanitizer) +# define ZSTD_ADDRESS_SANITIZER 1 +# elif defined(__SANITIZE_ADDRESS__) +# define ZSTD_ADDRESS_SANITIZER 1 +# else +# define ZSTD_ADDRESS_SANITIZER 0 +# endif +#endif + +/* detects whether we are being compiled under dfsan */ +#ifndef ZSTD_DATAFLOW_SANITIZER +# if __has_feature(dataflow_sanitizer) +# define ZSTD_DATAFLOW_SANITIZER 1 +# else +# define ZSTD_DATAFLOW_SANITIZER 0 +# endif +#endif + +/* Mark the internal assembly functions as hidden */ +#ifdef __ELF__ +# define ZSTD_HIDE_ASM_FUNCTION(func) .hidden func +#elif defined(__APPLE__) +# define ZSTD_HIDE_ASM_FUNCTION(func) .private_extern func +#else +# define ZSTD_HIDE_ASM_FUNCTION(func) +#endif + +/* Enable runtime BMI2 dispatch based on the CPU. + * Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default. + */ +#ifndef DYNAMIC_BMI2 + #if ((defined(__clang__) && __has_attribute(__target__)) \ + || (defined(__GNUC__) \ + && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))) \ + && (defined(__x86_64__) || defined(_M_X64)) \ + && !defined(__BMI2__) + # define DYNAMIC_BMI2 1 + #else + # define DYNAMIC_BMI2 0 + #endif +#endif + +/** + * Only enable assembly for GNUC compatible compilers, + * because other platforms may not support GAS assembly syntax. + * + * Only enable assembly for Linux / MacOS, other platforms may + * work, but they haven't been tested. This could likely be + * extended to BSD systems. + * + * Disable assembly when MSAN is enabled, because MSAN requires + * 100% of code to be instrumented to work. + */ +#if defined(__GNUC__) +# if defined(__linux__) || defined(__linux) || defined(__APPLE__) +# if ZSTD_MEMORY_SANITIZER +# define ZSTD_ASM_SUPPORTED 0 +# elif ZSTD_DATAFLOW_SANITIZER +# define ZSTD_ASM_SUPPORTED 0 +# else +# define ZSTD_ASM_SUPPORTED 1 +# endif +# else +# define ZSTD_ASM_SUPPORTED 0 +# endif +#else +# define ZSTD_ASM_SUPPORTED 0 +#endif + +/** + * Determines whether we should enable assembly for x86-64 + * with BMI2. + * + * Enable if all of the following conditions hold: + * - ASM hasn't been explicitly disabled by defining ZSTD_DISABLE_ASM + * - Assembly is supported + * - We are compiling for x86-64 and either: + * - DYNAMIC_BMI2 is enabled + * - BMI2 is supported at compile time + */ +#if !defined(ZSTD_DISABLE_ASM) && \ + ZSTD_ASM_SUPPORTED && \ + defined(__x86_64__) && \ + (DYNAMIC_BMI2 || defined(__BMI2__)) +# define ZSTD_ENABLE_ASM_X86_64_BMI2 1 +#else +# define ZSTD_ENABLE_ASM_X86_64_BMI2 0 +#endif + +/* + * For x86 ELF targets, add .note.gnu.property section for Intel CET in + * assembly sources when CET is enabled. + * + * Additionally, any function that may be called indirectly must begin + * with ZSTD_CET_ENDBRANCH. + */ +#if defined(__ELF__) && (defined(__x86_64__) || defined(__i386__)) \ + && defined(__has_include) +# if __has_include() +# include +# define ZSTD_CET_ENDBRANCH _CET_ENDBR +# endif +#endif + +#ifndef ZSTD_CET_ENDBRANCH +# define ZSTD_CET_ENDBRANCH +#endif + +#endif /* ZSTD_PORTABILITY_MACROS_H */ diff --git a/c-blosc/internal-complibs/zstd-1.5.6/common/threading.c b/c-blosc/internal-complibs/zstd-1.5.6/common/threading.c new file mode 100644 index 0000000..25bb8b9 --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.5.6/common/threading.c @@ -0,0 +1,182 @@ +/** + * Copyright (c) 2016 Tino Reichardt + * All rights reserved. + * + * You can contact the author at: + * - zstdmt source repository: https://github.com/mcmilk/zstdmt + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/** + * This file will hold wrapper for systems, which do not support pthreads + */ + +#include "threading.h" + +/* create fake symbol to avoid empty translation unit warning */ +int g_ZSTD_threading_useless_symbol; + +#if defined(ZSTD_MULTITHREAD) && defined(_WIN32) + +/** + * Windows minimalist Pthread Wrapper + */ + + +/* === Dependencies === */ +#include +#include + + +/* === Implementation === */ + +typedef struct { + void* (*start_routine)(void*); + void* arg; + int initialized; + ZSTD_pthread_cond_t initialized_cond; + ZSTD_pthread_mutex_t initialized_mutex; +} ZSTD_thread_params_t; + +static unsigned __stdcall worker(void *arg) +{ + void* (*start_routine)(void*); + void* thread_arg; + + /* Initialized thread_arg and start_routine and signal main thread that we don't need it + * to wait any longer. + */ + { + ZSTD_thread_params_t* thread_param = (ZSTD_thread_params_t*)arg; + thread_arg = thread_param->arg; + start_routine = thread_param->start_routine; + + /* Signal main thread that we are running and do not depend on its memory anymore */ + ZSTD_pthread_mutex_lock(&thread_param->initialized_mutex); + thread_param->initialized = 1; + ZSTD_pthread_cond_signal(&thread_param->initialized_cond); + ZSTD_pthread_mutex_unlock(&thread_param->initialized_mutex); + } + + start_routine(thread_arg); + + return 0; +} + +int ZSTD_pthread_create(ZSTD_pthread_t* thread, const void* unused, + void* (*start_routine) (void*), void* arg) +{ + ZSTD_thread_params_t thread_param; + (void)unused; + + if (thread==NULL) return -1; + *thread = NULL; + + thread_param.start_routine = start_routine; + thread_param.arg = arg; + thread_param.initialized = 0; + + /* Setup thread initialization synchronization */ + if(ZSTD_pthread_cond_init(&thread_param.initialized_cond, NULL)) { + /* Should never happen on Windows */ + return -1; + } + if(ZSTD_pthread_mutex_init(&thread_param.initialized_mutex, NULL)) { + /* Should never happen on Windows */ + ZSTD_pthread_cond_destroy(&thread_param.initialized_cond); + return -1; + } + + /* Spawn thread */ + *thread = (HANDLE)_beginthreadex(NULL, 0, worker, &thread_param, 0, NULL); + if (*thread==NULL) { + ZSTD_pthread_mutex_destroy(&thread_param.initialized_mutex); + ZSTD_pthread_cond_destroy(&thread_param.initialized_cond); + return errno; + } + + /* Wait for thread to be initialized */ + ZSTD_pthread_mutex_lock(&thread_param.initialized_mutex); + while(!thread_param.initialized) { + ZSTD_pthread_cond_wait(&thread_param.initialized_cond, &thread_param.initialized_mutex); + } + ZSTD_pthread_mutex_unlock(&thread_param.initialized_mutex); + ZSTD_pthread_mutex_destroy(&thread_param.initialized_mutex); + ZSTD_pthread_cond_destroy(&thread_param.initialized_cond); + + return 0; +} + +int ZSTD_pthread_join(ZSTD_pthread_t thread) +{ + DWORD result; + + if (!thread) return 0; + + result = WaitForSingleObject(thread, INFINITE); + CloseHandle(thread); + + switch (result) { + case WAIT_OBJECT_0: + return 0; + case WAIT_ABANDONED: + return EINVAL; + default: + return GetLastError(); + } +} + +#endif /* ZSTD_MULTITHREAD */ + +#if defined(ZSTD_MULTITHREAD) && DEBUGLEVEL >= 1 && !defined(_WIN32) + +#define ZSTD_DEPS_NEED_MALLOC +#include "zstd_deps.h" + +int ZSTD_pthread_mutex_init(ZSTD_pthread_mutex_t* mutex, pthread_mutexattr_t const* attr) +{ + assert(mutex != NULL); + *mutex = (pthread_mutex_t*)ZSTD_malloc(sizeof(pthread_mutex_t)); + if (!*mutex) + return 1; + return pthread_mutex_init(*mutex, attr); +} + +int ZSTD_pthread_mutex_destroy(ZSTD_pthread_mutex_t* mutex) +{ + assert(mutex != NULL); + if (!*mutex) + return 0; + { + int const ret = pthread_mutex_destroy(*mutex); + ZSTD_free(*mutex); + return ret; + } +} + +int ZSTD_pthread_cond_init(ZSTD_pthread_cond_t* cond, pthread_condattr_t const* attr) +{ + assert(cond != NULL); + *cond = (pthread_cond_t*)ZSTD_malloc(sizeof(pthread_cond_t)); + if (!*cond) + return 1; + return pthread_cond_init(*cond, attr); +} + +int ZSTD_pthread_cond_destroy(ZSTD_pthread_cond_t* cond) +{ + assert(cond != NULL); + if (!*cond) + return 0; + { + int const ret = pthread_cond_destroy(*cond); + ZSTD_free(*cond); + return ret; + } +} + +#endif diff --git a/c-blosc/internal-complibs/zstd-1.5.6/common/threading.h b/c-blosc/internal-complibs/zstd-1.5.6/common/threading.h new file mode 100644 index 0000000..fb5c1c8 --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.5.6/common/threading.h @@ -0,0 +1,150 @@ +/** + * Copyright (c) 2016 Tino Reichardt + * All rights reserved. + * + * You can contact the author at: + * - zstdmt source repository: https://github.com/mcmilk/zstdmt + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef THREADING_H_938743 +#define THREADING_H_938743 + +#include "debug.h" + +#if defined (__cplusplus) +extern "C" { +#endif + +#if defined(ZSTD_MULTITHREAD) && defined(_WIN32) + +/** + * Windows minimalist Pthread Wrapper + */ +#ifdef WINVER +# undef WINVER +#endif +#define WINVER 0x0600 + +#ifdef _WIN32_WINNT +# undef _WIN32_WINNT +#endif +#define _WIN32_WINNT 0x0600 + +#ifndef WIN32_LEAN_AND_MEAN +# define WIN32_LEAN_AND_MEAN +#endif + +#undef ERROR /* reported already defined on VS 2015 (Rich Geldreich) */ +#include +#undef ERROR +#define ERROR(name) ZSTD_ERROR(name) + + +/* mutex */ +#define ZSTD_pthread_mutex_t CRITICAL_SECTION +#define ZSTD_pthread_mutex_init(a, b) ((void)(b), InitializeCriticalSection((a)), 0) +#define ZSTD_pthread_mutex_destroy(a) DeleteCriticalSection((a)) +#define ZSTD_pthread_mutex_lock(a) EnterCriticalSection((a)) +#define ZSTD_pthread_mutex_unlock(a) LeaveCriticalSection((a)) + +/* condition variable */ +#define ZSTD_pthread_cond_t CONDITION_VARIABLE +#define ZSTD_pthread_cond_init(a, b) ((void)(b), InitializeConditionVariable((a)), 0) +#define ZSTD_pthread_cond_destroy(a) ((void)(a)) +#define ZSTD_pthread_cond_wait(a, b) SleepConditionVariableCS((a), (b), INFINITE) +#define ZSTD_pthread_cond_signal(a) WakeConditionVariable((a)) +#define ZSTD_pthread_cond_broadcast(a) WakeAllConditionVariable((a)) + +/* ZSTD_pthread_create() and ZSTD_pthread_join() */ +typedef HANDLE ZSTD_pthread_t; + +int ZSTD_pthread_create(ZSTD_pthread_t* thread, const void* unused, + void* (*start_routine) (void*), void* arg); + +int ZSTD_pthread_join(ZSTD_pthread_t thread); + +/** + * add here more wrappers as required + */ + + +#elif defined(ZSTD_MULTITHREAD) /* posix assumed ; need a better detection method */ +/* === POSIX Systems === */ +# include + +#if DEBUGLEVEL < 1 + +#define ZSTD_pthread_mutex_t pthread_mutex_t +#define ZSTD_pthread_mutex_init(a, b) pthread_mutex_init((a), (b)) +#define ZSTD_pthread_mutex_destroy(a) pthread_mutex_destroy((a)) +#define ZSTD_pthread_mutex_lock(a) pthread_mutex_lock((a)) +#define ZSTD_pthread_mutex_unlock(a) pthread_mutex_unlock((a)) + +#define ZSTD_pthread_cond_t pthread_cond_t +#define ZSTD_pthread_cond_init(a, b) pthread_cond_init((a), (b)) +#define ZSTD_pthread_cond_destroy(a) pthread_cond_destroy((a)) +#define ZSTD_pthread_cond_wait(a, b) pthread_cond_wait((a), (b)) +#define ZSTD_pthread_cond_signal(a) pthread_cond_signal((a)) +#define ZSTD_pthread_cond_broadcast(a) pthread_cond_broadcast((a)) + +#define ZSTD_pthread_t pthread_t +#define ZSTD_pthread_create(a, b, c, d) pthread_create((a), (b), (c), (d)) +#define ZSTD_pthread_join(a) pthread_join((a),NULL) + +#else /* DEBUGLEVEL >= 1 */ + +/* Debug implementation of threading. + * In this implementation we use pointers for mutexes and condition variables. + * This way, if we forget to init/destroy them the program will crash or ASAN + * will report leaks. + */ + +#define ZSTD_pthread_mutex_t pthread_mutex_t* +int ZSTD_pthread_mutex_init(ZSTD_pthread_mutex_t* mutex, pthread_mutexattr_t const* attr); +int ZSTD_pthread_mutex_destroy(ZSTD_pthread_mutex_t* mutex); +#define ZSTD_pthread_mutex_lock(a) pthread_mutex_lock(*(a)) +#define ZSTD_pthread_mutex_unlock(a) pthread_mutex_unlock(*(a)) + +#define ZSTD_pthread_cond_t pthread_cond_t* +int ZSTD_pthread_cond_init(ZSTD_pthread_cond_t* cond, pthread_condattr_t const* attr); +int ZSTD_pthread_cond_destroy(ZSTD_pthread_cond_t* cond); +#define ZSTD_pthread_cond_wait(a, b) pthread_cond_wait(*(a), *(b)) +#define ZSTD_pthread_cond_signal(a) pthread_cond_signal(*(a)) +#define ZSTD_pthread_cond_broadcast(a) pthread_cond_broadcast(*(a)) + +#define ZSTD_pthread_t pthread_t +#define ZSTD_pthread_create(a, b, c, d) pthread_create((a), (b), (c), (d)) +#define ZSTD_pthread_join(a) pthread_join((a),NULL) + +#endif + +#else /* ZSTD_MULTITHREAD not defined */ +/* No multithreading support */ + +typedef int ZSTD_pthread_mutex_t; +#define ZSTD_pthread_mutex_init(a, b) ((void)(a), (void)(b), 0) +#define ZSTD_pthread_mutex_destroy(a) ((void)(a)) +#define ZSTD_pthread_mutex_lock(a) ((void)(a)) +#define ZSTD_pthread_mutex_unlock(a) ((void)(a)) + +typedef int ZSTD_pthread_cond_t; +#define ZSTD_pthread_cond_init(a, b) ((void)(a), (void)(b), 0) +#define ZSTD_pthread_cond_destroy(a) ((void)(a)) +#define ZSTD_pthread_cond_wait(a, b) ((void)(a), (void)(b)) +#define ZSTD_pthread_cond_signal(a) ((void)(a)) +#define ZSTD_pthread_cond_broadcast(a) ((void)(a)) + +/* do not use ZSTD_pthread_t */ + +#endif /* ZSTD_MULTITHREAD */ + +#if defined (__cplusplus) +} +#endif + +#endif /* THREADING_H_938743 */ diff --git a/c-blosc/internal-complibs/zstd-1.5.6/common/xxhash.c b/c-blosc/internal-complibs/zstd-1.5.6/common/xxhash.c new file mode 100644 index 0000000..052cd52 --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.5.6/common/xxhash.c @@ -0,0 +1,18 @@ +/* + * xxHash - Extremely Fast Hash algorithm + * Copyright (c) Yann Collet - Meta Platforms, Inc + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/* + * xxhash.c instantiates functions defined in xxhash.h + */ + +#define XXH_STATIC_LINKING_ONLY /* access advanced declarations */ +#define XXH_IMPLEMENTATION /* access definitions */ + +#include "xxhash.h" diff --git a/c-blosc/internal-complibs/zstd-1.5.6/common/xxhash.h b/c-blosc/internal-complibs/zstd-1.5.6/common/xxhash.h new file mode 100644 index 0000000..e59e442 --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.5.6/common/xxhash.h @@ -0,0 +1,7020 @@ +/* + * xxHash - Extremely Fast Hash algorithm + * Header File + * Copyright (c) Yann Collet - Meta Platforms, Inc + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/* Local adaptations for Zstandard */ + +#ifndef XXH_NO_XXH3 +# define XXH_NO_XXH3 +#endif + +#ifndef XXH_NAMESPACE +# define XXH_NAMESPACE ZSTD_ +#endif + +/*! + * @mainpage xxHash + * + * xxHash is an extremely fast non-cryptographic hash algorithm, working at RAM speed + * limits. + * + * It is proposed in four flavors, in three families: + * 1. @ref XXH32_family + * - Classic 32-bit hash function. Simple, compact, and runs on almost all + * 32-bit and 64-bit systems. + * 2. @ref XXH64_family + * - Classic 64-bit adaptation of XXH32. Just as simple, and runs well on most + * 64-bit systems (but _not_ 32-bit systems). + * 3. @ref XXH3_family + * - Modern 64-bit and 128-bit hash function family which features improved + * strength and performance across the board, especially on smaller data. + * It benefits greatly from SIMD and 64-bit without requiring it. + * + * Benchmarks + * --- + * The reference system uses an Intel i7-9700K CPU, and runs Ubuntu x64 20.04. + * The open source benchmark program is compiled with clang v10.0 using -O3 flag. + * + * | Hash Name | ISA ext | Width | Large Data Speed | Small Data Velocity | + * | -------------------- | ------- | ----: | ---------------: | ------------------: | + * | XXH3_64bits() | @b AVX2 | 64 | 59.4 GB/s | 133.1 | + * | MeowHash | AES-NI | 128 | 58.2 GB/s | 52.5 | + * | XXH3_128bits() | @b AVX2 | 128 | 57.9 GB/s | 118.1 | + * | CLHash | PCLMUL | 64 | 37.1 GB/s | 58.1 | + * | XXH3_64bits() | @b SSE2 | 64 | 31.5 GB/s | 133.1 | + * | XXH3_128bits() | @b SSE2 | 128 | 29.6 GB/s | 118.1 | + * | RAM sequential read | | N/A | 28.0 GB/s | N/A | + * | ahash | AES-NI | 64 | 22.5 GB/s | 107.2 | + * | City64 | | 64 | 22.0 GB/s | 76.6 | + * | T1ha2 | | 64 | 22.0 GB/s | 99.0 | + * | City128 | | 128 | 21.7 GB/s | 57.7 | + * | FarmHash | AES-NI | 64 | 21.3 GB/s | 71.9 | + * | XXH64() | | 64 | 19.4 GB/s | 71.0 | + * | SpookyHash | | 64 | 19.3 GB/s | 53.2 | + * | Mum | | 64 | 18.0 GB/s | 67.0 | + * | CRC32C | SSE4.2 | 32 | 13.0 GB/s | 57.9 | + * | XXH32() | | 32 | 9.7 GB/s | 71.9 | + * | City32 | | 32 | 9.1 GB/s | 66.0 | + * | Blake3* | @b AVX2 | 256 | 4.4 GB/s | 8.1 | + * | Murmur3 | | 32 | 3.9 GB/s | 56.1 | + * | SipHash* | | 64 | 3.0 GB/s | 43.2 | + * | Blake3* | @b SSE2 | 256 | 2.4 GB/s | 8.1 | + * | HighwayHash | | 64 | 1.4 GB/s | 6.0 | + * | FNV64 | | 64 | 1.2 GB/s | 62.7 | + * | Blake2* | | 256 | 1.1 GB/s | 5.1 | + * | SHA1* | | 160 | 0.8 GB/s | 5.6 | + * | MD5* | | 128 | 0.6 GB/s | 7.8 | + * @note + * - Hashes which require a specific ISA extension are noted. SSE2 is also noted, + * even though it is mandatory on x64. + * - Hashes with an asterisk are cryptographic. Note that MD5 is non-cryptographic + * by modern standards. + * - Small data velocity is a rough average of algorithm's efficiency for small + * data. For more accurate information, see the wiki. + * - More benchmarks and strength tests are found on the wiki: + * https://github.com/Cyan4973/xxHash/wiki + * + * Usage + * ------ + * All xxHash variants use a similar API. Changing the algorithm is a trivial + * substitution. + * + * @pre + * For functions which take an input and length parameter, the following + * requirements are assumed: + * - The range from [`input`, `input + length`) is valid, readable memory. + * - The only exception is if the `length` is `0`, `input` may be `NULL`. + * - For C++, the objects must have the *TriviallyCopyable* property, as the + * functions access bytes directly as if it was an array of `unsigned char`. + * + * @anchor single_shot_example + * **Single Shot** + * + * These functions are stateless functions which hash a contiguous block of memory, + * immediately returning the result. They are the easiest and usually the fastest + * option. + * + * XXH32(), XXH64(), XXH3_64bits(), XXH3_128bits() + * + * @code{.c} + * #include + * #include "xxhash.h" + * + * // Example for a function which hashes a null terminated string with XXH32(). + * XXH32_hash_t hash_string(const char* string, XXH32_hash_t seed) + * { + * // NULL pointers are only valid if the length is zero + * size_t length = (string == NULL) ? 0 : strlen(string); + * return XXH32(string, length, seed); + * } + * @endcode + * + * + * @anchor streaming_example + * **Streaming** + * + * These groups of functions allow incremental hashing of unknown size, even + * more than what would fit in a size_t. + * + * XXH32_reset(), XXH64_reset(), XXH3_64bits_reset(), XXH3_128bits_reset() + * + * @code{.c} + * #include + * #include + * #include "xxhash.h" + * // Example for a function which hashes a FILE incrementally with XXH3_64bits(). + * XXH64_hash_t hashFile(FILE* f) + * { + * // Allocate a state struct. Do not just use malloc() or new. + * XXH3_state_t* state = XXH3_createState(); + * assert(state != NULL && "Out of memory!"); + * // Reset the state to start a new hashing session. + * XXH3_64bits_reset(state); + * char buffer[4096]; + * size_t count; + * // Read the file in chunks + * while ((count = fread(buffer, 1, sizeof(buffer), f)) != 0) { + * // Run update() as many times as necessary to process the data + * XXH3_64bits_update(state, buffer, count); + * } + * // Retrieve the finalized hash. This will not change the state. + * XXH64_hash_t result = XXH3_64bits_digest(state); + * // Free the state. Do not use free(). + * XXH3_freeState(state); + * return result; + * } + * @endcode + * + * Streaming functions generate the xxHash value from an incremental input. + * This method is slower than single-call functions, due to state management. + * For small inputs, prefer `XXH32()` and `XXH64()`, which are better optimized. + * + * An XXH state must first be allocated using `XXH*_createState()`. + * + * Start a new hash by initializing the state with a seed using `XXH*_reset()`. + * + * Then, feed the hash state by calling `XXH*_update()` as many times as necessary. + * + * The function returns an error code, with 0 meaning OK, and any other value + * meaning there is an error. + * + * Finally, a hash value can be produced anytime, by using `XXH*_digest()`. + * This function returns the nn-bits hash as an int or long long. + * + * It's still possible to continue inserting input into the hash state after a + * digest, and generate new hash values later on by invoking `XXH*_digest()`. + * + * When done, release the state using `XXH*_freeState()`. + * + * + * @anchor canonical_representation_example + * **Canonical Representation** + * + * The default return values from XXH functions are unsigned 32, 64 and 128 bit + * integers. + * This the simplest and fastest format for further post-processing. + * + * However, this leaves open the question of what is the order on the byte level, + * since little and big endian conventions will store the same number differently. + * + * The canonical representation settles this issue by mandating big-endian + * convention, the same convention as human-readable numbers (large digits first). + * + * When writing hash values to storage, sending them over a network, or printing + * them, it's highly recommended to use the canonical representation to ensure + * portability across a wider range of systems, present and future. + * + * The following functions allow transformation of hash values to and from + * canonical format. + * + * XXH32_canonicalFromHash(), XXH32_hashFromCanonical(), + * XXH64_canonicalFromHash(), XXH64_hashFromCanonical(), + * XXH128_canonicalFromHash(), XXH128_hashFromCanonical(), + * + * @code{.c} + * #include + * #include "xxhash.h" + * + * // Example for a function which prints XXH32_hash_t in human readable format + * void printXxh32(XXH32_hash_t hash) + * { + * XXH32_canonical_t cano; + * XXH32_canonicalFromHash(&cano, hash); + * size_t i; + * for(i = 0; i < sizeof(cano.digest); ++i) { + * printf("%02x", cano.digest[i]); + * } + * printf("\n"); + * } + * + * // Example for a function which converts XXH32_canonical_t to XXH32_hash_t + * XXH32_hash_t convertCanonicalToXxh32(XXH32_canonical_t cano) + * { + * XXH32_hash_t hash = XXH32_hashFromCanonical(&cano); + * return hash; + * } + * @endcode + * + * + * @file xxhash.h + * xxHash prototypes and implementation + */ + +#if defined (__cplusplus) +extern "C" { +#endif + +/* **************************** + * INLINE mode + ******************************/ +/*! + * @defgroup public Public API + * Contains details on the public xxHash functions. + * @{ + */ +#ifdef XXH_DOXYGEN +/*! + * @brief Gives access to internal state declaration, required for static allocation. + * + * Incompatible with dynamic linking, due to risks of ABI changes. + * + * Usage: + * @code{.c} + * #define XXH_STATIC_LINKING_ONLY + * #include "xxhash.h" + * @endcode + */ +# define XXH_STATIC_LINKING_ONLY +/* Do not undef XXH_STATIC_LINKING_ONLY for Doxygen */ + +/*! + * @brief Gives access to internal definitions. + * + * Usage: + * @code{.c} + * #define XXH_STATIC_LINKING_ONLY + * #define XXH_IMPLEMENTATION + * #include "xxhash.h" + * @endcode + */ +# define XXH_IMPLEMENTATION +/* Do not undef XXH_IMPLEMENTATION for Doxygen */ + +/*! + * @brief Exposes the implementation and marks all functions as `inline`. + * + * Use these build macros to inline xxhash into the target unit. + * Inlining improves performance on small inputs, especially when the length is + * expressed as a compile-time constant: + * + * https://fastcompression.blogspot.com/2018/03/xxhash-for-small-keys-impressive-power.html + * + * It also keeps xxHash symbols private to the unit, so they are not exported. + * + * Usage: + * @code{.c} + * #define XXH_INLINE_ALL + * #include "xxhash.h" + * @endcode + * Do not compile and link xxhash.o as a separate object, as it is not useful. + */ +# define XXH_INLINE_ALL +# undef XXH_INLINE_ALL +/*! + * @brief Exposes the implementation without marking functions as inline. + */ +# define XXH_PRIVATE_API +# undef XXH_PRIVATE_API +/*! + * @brief Emulate a namespace by transparently prefixing all symbols. + * + * If you want to include _and expose_ xxHash functions from within your own + * library, but also want to avoid symbol collisions with other libraries which + * may also include xxHash, you can use @ref XXH_NAMESPACE to automatically prefix + * any public symbol from xxhash library with the value of @ref XXH_NAMESPACE + * (therefore, avoid empty or numeric values). + * + * Note that no change is required within the calling program as long as it + * includes `xxhash.h`: Regular symbol names will be automatically translated + * by this header. + */ +# define XXH_NAMESPACE /* YOUR NAME HERE */ +# undef XXH_NAMESPACE +#endif + +#if (defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)) \ + && !defined(XXH_INLINE_ALL_31684351384) + /* this section should be traversed only once */ +# define XXH_INLINE_ALL_31684351384 + /* give access to the advanced API, required to compile implementations */ +# undef XXH_STATIC_LINKING_ONLY /* avoid macro redef */ +# define XXH_STATIC_LINKING_ONLY + /* make all functions private */ +# undef XXH_PUBLIC_API +# if defined(__GNUC__) +# define XXH_PUBLIC_API static __inline __attribute__((unused)) +# elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +# define XXH_PUBLIC_API static inline +# elif defined(_MSC_VER) +# define XXH_PUBLIC_API static __inline +# else + /* note: this version may generate warnings for unused static functions */ +# define XXH_PUBLIC_API static +# endif + + /* + * This part deals with the special case where a unit wants to inline xxHash, + * but "xxhash.h" has previously been included without XXH_INLINE_ALL, + * such as part of some previously included *.h header file. + * Without further action, the new include would just be ignored, + * and functions would effectively _not_ be inlined (silent failure). + * The following macros solve this situation by prefixing all inlined names, + * avoiding naming collision with previous inclusions. + */ + /* Before that, we unconditionally #undef all symbols, + * in case they were already defined with XXH_NAMESPACE. + * They will then be redefined for XXH_INLINE_ALL + */ +# undef XXH_versionNumber + /* XXH32 */ +# undef XXH32 +# undef XXH32_createState +# undef XXH32_freeState +# undef XXH32_reset +# undef XXH32_update +# undef XXH32_digest +# undef XXH32_copyState +# undef XXH32_canonicalFromHash +# undef XXH32_hashFromCanonical + /* XXH64 */ +# undef XXH64 +# undef XXH64_createState +# undef XXH64_freeState +# undef XXH64_reset +# undef XXH64_update +# undef XXH64_digest +# undef XXH64_copyState +# undef XXH64_canonicalFromHash +# undef XXH64_hashFromCanonical + /* XXH3_64bits */ +# undef XXH3_64bits +# undef XXH3_64bits_withSecret +# undef XXH3_64bits_withSeed +# undef XXH3_64bits_withSecretandSeed +# undef XXH3_createState +# undef XXH3_freeState +# undef XXH3_copyState +# undef XXH3_64bits_reset +# undef XXH3_64bits_reset_withSeed +# undef XXH3_64bits_reset_withSecret +# undef XXH3_64bits_update +# undef XXH3_64bits_digest +# undef XXH3_generateSecret + /* XXH3_128bits */ +# undef XXH128 +# undef XXH3_128bits +# undef XXH3_128bits_withSeed +# undef XXH3_128bits_withSecret +# undef XXH3_128bits_reset +# undef XXH3_128bits_reset_withSeed +# undef XXH3_128bits_reset_withSecret +# undef XXH3_128bits_reset_withSecretandSeed +# undef XXH3_128bits_update +# undef XXH3_128bits_digest +# undef XXH128_isEqual +# undef XXH128_cmp +# undef XXH128_canonicalFromHash +# undef XXH128_hashFromCanonical + /* Finally, free the namespace itself */ +# undef XXH_NAMESPACE + + /* employ the namespace for XXH_INLINE_ALL */ +# define XXH_NAMESPACE XXH_INLINE_ + /* + * Some identifiers (enums, type names) are not symbols, + * but they must nonetheless be renamed to avoid redeclaration. + * Alternative solution: do not redeclare them. + * However, this requires some #ifdefs, and has a more dispersed impact. + * Meanwhile, renaming can be achieved in a single place. + */ +# define XXH_IPREF(Id) XXH_NAMESPACE ## Id +# define XXH_OK XXH_IPREF(XXH_OK) +# define XXH_ERROR XXH_IPREF(XXH_ERROR) +# define XXH_errorcode XXH_IPREF(XXH_errorcode) +# define XXH32_canonical_t XXH_IPREF(XXH32_canonical_t) +# define XXH64_canonical_t XXH_IPREF(XXH64_canonical_t) +# define XXH128_canonical_t XXH_IPREF(XXH128_canonical_t) +# define XXH32_state_s XXH_IPREF(XXH32_state_s) +# define XXH32_state_t XXH_IPREF(XXH32_state_t) +# define XXH64_state_s XXH_IPREF(XXH64_state_s) +# define XXH64_state_t XXH_IPREF(XXH64_state_t) +# define XXH3_state_s XXH_IPREF(XXH3_state_s) +# define XXH3_state_t XXH_IPREF(XXH3_state_t) +# define XXH128_hash_t XXH_IPREF(XXH128_hash_t) + /* Ensure the header is parsed again, even if it was previously included */ +# undef XXHASH_H_5627135585666179 +# undef XXHASH_H_STATIC_13879238742 +#endif /* XXH_INLINE_ALL || XXH_PRIVATE_API */ + +/* **************************************************************** + * Stable API + *****************************************************************/ +#ifndef XXHASH_H_5627135585666179 +#define XXHASH_H_5627135585666179 1 + +/*! @brief Marks a global symbol. */ +#if !defined(XXH_INLINE_ALL) && !defined(XXH_PRIVATE_API) +# if defined(WIN32) && defined(_MSC_VER) && (defined(XXH_IMPORT) || defined(XXH_EXPORT)) +# ifdef XXH_EXPORT +# define XXH_PUBLIC_API __declspec(dllexport) +# elif XXH_IMPORT +# define XXH_PUBLIC_API __declspec(dllimport) +# endif +# else +# define XXH_PUBLIC_API /* do nothing */ +# endif +#endif + +#ifdef XXH_NAMESPACE +# define XXH_CAT(A,B) A##B +# define XXH_NAME2(A,B) XXH_CAT(A,B) +# define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber) +/* XXH32 */ +# define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32) +# define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState) +# define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState) +# define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset) +# define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update) +# define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest) +# define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState) +# define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash) +# define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical) +/* XXH64 */ +# define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64) +# define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState) +# define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState) +# define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset) +# define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update) +# define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest) +# define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState) +# define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash) +# define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical) +/* XXH3_64bits */ +# define XXH3_64bits XXH_NAME2(XXH_NAMESPACE, XXH3_64bits) +# define XXH3_64bits_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSecret) +# define XXH3_64bits_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSeed) +# define XXH3_64bits_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSecretandSeed) +# define XXH3_createState XXH_NAME2(XXH_NAMESPACE, XXH3_createState) +# define XXH3_freeState XXH_NAME2(XXH_NAMESPACE, XXH3_freeState) +# define XXH3_copyState XXH_NAME2(XXH_NAMESPACE, XXH3_copyState) +# define XXH3_64bits_reset XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset) +# define XXH3_64bits_reset_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSeed) +# define XXH3_64bits_reset_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSecret) +# define XXH3_64bits_reset_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSecretandSeed) +# define XXH3_64bits_update XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_update) +# define XXH3_64bits_digest XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_digest) +# define XXH3_generateSecret XXH_NAME2(XXH_NAMESPACE, XXH3_generateSecret) +# define XXH3_generateSecret_fromSeed XXH_NAME2(XXH_NAMESPACE, XXH3_generateSecret_fromSeed) +/* XXH3_128bits */ +# define XXH128 XXH_NAME2(XXH_NAMESPACE, XXH128) +# define XXH3_128bits XXH_NAME2(XXH_NAMESPACE, XXH3_128bits) +# define XXH3_128bits_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSeed) +# define XXH3_128bits_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSecret) +# define XXH3_128bits_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSecretandSeed) +# define XXH3_128bits_reset XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset) +# define XXH3_128bits_reset_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSeed) +# define XXH3_128bits_reset_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSecret) +# define XXH3_128bits_reset_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSecretandSeed) +# define XXH3_128bits_update XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_update) +# define XXH3_128bits_digest XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_digest) +# define XXH128_isEqual XXH_NAME2(XXH_NAMESPACE, XXH128_isEqual) +# define XXH128_cmp XXH_NAME2(XXH_NAMESPACE, XXH128_cmp) +# define XXH128_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH128_canonicalFromHash) +# define XXH128_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH128_hashFromCanonical) +#endif + + +/* ************************************* +* Compiler specifics +***************************************/ + +/* specific declaration modes for Windows */ +#if !defined(XXH_INLINE_ALL) && !defined(XXH_PRIVATE_API) +# if defined(WIN32) && defined(_MSC_VER) && (defined(XXH_IMPORT) || defined(XXH_EXPORT)) +# ifdef XXH_EXPORT +# define XXH_PUBLIC_API __declspec(dllexport) +# elif XXH_IMPORT +# define XXH_PUBLIC_API __declspec(dllimport) +# endif +# else +# define XXH_PUBLIC_API /* do nothing */ +# endif +#endif + +#if defined (__GNUC__) +# define XXH_CONSTF __attribute__((const)) +# define XXH_PUREF __attribute__((pure)) +# define XXH_MALLOCF __attribute__((malloc)) +#else +# define XXH_CONSTF /* disable */ +# define XXH_PUREF +# define XXH_MALLOCF +#endif + +/* ************************************* +* Version +***************************************/ +#define XXH_VERSION_MAJOR 0 +#define XXH_VERSION_MINOR 8 +#define XXH_VERSION_RELEASE 2 +/*! @brief Version number, encoded as two digits each */ +#define XXH_VERSION_NUMBER (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE) + +/*! + * @brief Obtains the xxHash version. + * + * This is mostly useful when xxHash is compiled as a shared library, + * since the returned value comes from the library, as opposed to header file. + * + * @return @ref XXH_VERSION_NUMBER of the invoked library. + */ +XXH_PUBLIC_API XXH_CONSTF unsigned XXH_versionNumber (void); + + +/* **************************** +* Common basic types +******************************/ +#include /* size_t */ +/*! + * @brief Exit code for the streaming API. + */ +typedef enum { + XXH_OK = 0, /*!< OK */ + XXH_ERROR /*!< Error */ +} XXH_errorcode; + + +/*-********************************************************************** +* 32-bit hash +************************************************************************/ +#if defined(XXH_DOXYGEN) /* Don't show include */ +/*! + * @brief An unsigned 32-bit integer. + * + * Not necessarily defined to `uint32_t` but functionally equivalent. + */ +typedef uint32_t XXH32_hash_t; + +#elif !defined (__VMS) \ + && (defined (__cplusplus) \ + || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) +# ifdef _AIX +# include +# else +# include +# endif + typedef uint32_t XXH32_hash_t; + +#else +# include +# if UINT_MAX == 0xFFFFFFFFUL + typedef unsigned int XXH32_hash_t; +# elif ULONG_MAX == 0xFFFFFFFFUL + typedef unsigned long XXH32_hash_t; +# else +# error "unsupported platform: need a 32-bit type" +# endif +#endif + +/*! + * @} + * + * @defgroup XXH32_family XXH32 family + * @ingroup public + * Contains functions used in the classic 32-bit xxHash algorithm. + * + * @note + * XXH32 is useful for older platforms, with no or poor 64-bit performance. + * Note that the @ref XXH3_family provides competitive speed for both 32-bit + * and 64-bit systems, and offers true 64/128 bit hash results. + * + * @see @ref XXH64_family, @ref XXH3_family : Other xxHash families + * @see @ref XXH32_impl for implementation details + * @{ + */ + +/*! + * @brief Calculates the 32-bit hash of @p input using xxHash32. + * + * @param input The block of data to be hashed, at least @p length bytes in size. + * @param length The length of @p input, in bytes. + * @param seed The 32-bit seed to alter the hash's output predictably. + * + * @pre + * The memory between @p input and @p input + @p length must be valid, + * readable, contiguous memory. However, if @p length is `0`, @p input may be + * `NULL`. In C++, this also must be *TriviallyCopyable*. + * + * @return The calculated 32-bit xxHash32 value. + * + * @see @ref single_shot_example "Single Shot Example" for an example. + */ +XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32 (const void* input, size_t length, XXH32_hash_t seed); + +#ifndef XXH_NO_STREAM +/*! + * @typedef struct XXH32_state_s XXH32_state_t + * @brief The opaque state struct for the XXH32 streaming API. + * + * @see XXH32_state_s for details. + */ +typedef struct XXH32_state_s XXH32_state_t; + +/*! + * @brief Allocates an @ref XXH32_state_t. + * + * @return An allocated pointer of @ref XXH32_state_t on success. + * @return `NULL` on failure. + * + * @note Must be freed with XXH32_freeState(). + */ +XXH_PUBLIC_API XXH_MALLOCF XXH32_state_t* XXH32_createState(void); +/*! + * @brief Frees an @ref XXH32_state_t. + * + * @param statePtr A pointer to an @ref XXH32_state_t allocated with @ref XXH32_createState(). + * + * @return @ref XXH_OK. + * + * @note @p statePtr must be allocated with XXH32_createState(). + * + */ +XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr); +/*! + * @brief Copies one @ref XXH32_state_t to another. + * + * @param dst_state The state to copy to. + * @param src_state The state to copy from. + * @pre + * @p dst_state and @p src_state must not be `NULL` and must not overlap. + */ +XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dst_state, const XXH32_state_t* src_state); + +/*! + * @brief Resets an @ref XXH32_state_t to begin a new hash. + * + * @param statePtr The state struct to reset. + * @param seed The 32-bit seed to alter the hash result predictably. + * + * @pre + * @p statePtr must not be `NULL`. + * + * @return @ref XXH_OK on success. + * @return @ref XXH_ERROR on failure. + * + * @note This function resets and seeds a state. Call it before @ref XXH32_update(). + */ +XXH_PUBLIC_API XXH_errorcode XXH32_reset (XXH32_state_t* statePtr, XXH32_hash_t seed); + +/*! + * @brief Consumes a block of @p input to an @ref XXH32_state_t. + * + * @param statePtr The state struct to update. + * @param input The block of data to be hashed, at least @p length bytes in size. + * @param length The length of @p input, in bytes. + * + * @pre + * @p statePtr must not be `NULL`. + * @pre + * The memory between @p input and @p input + @p length must be valid, + * readable, contiguous memory. However, if @p length is `0`, @p input may be + * `NULL`. In C++, this also must be *TriviallyCopyable*. + * + * @return @ref XXH_OK on success. + * @return @ref XXH_ERROR on failure. + * + * @note Call this to incrementally consume blocks of data. + */ +XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length); + +/*! + * @brief Returns the calculated hash value from an @ref XXH32_state_t. + * + * @param statePtr The state struct to calculate the hash from. + * + * @pre + * @p statePtr must not be `NULL`. + * + * @return The calculated 32-bit xxHash32 value from that state. + * + * @note + * Calling XXH32_digest() will not affect @p statePtr, so you can update, + * digest, and update again. + */ +XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr); +#endif /* !XXH_NO_STREAM */ + +/******* Canonical representation *******/ + +/*! + * @brief Canonical (big endian) representation of @ref XXH32_hash_t. + */ +typedef struct { + unsigned char digest[4]; /*!< Hash bytes, big endian */ +} XXH32_canonical_t; + +/*! + * @brief Converts an @ref XXH32_hash_t to a big endian @ref XXH32_canonical_t. + * + * @param dst The @ref XXH32_canonical_t pointer to be stored to. + * @param hash The @ref XXH32_hash_t to be converted. + * + * @pre + * @p dst must not be `NULL`. + * + * @see @ref canonical_representation_example "Canonical Representation Example" + */ +XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash); + +/*! + * @brief Converts an @ref XXH32_canonical_t to a native @ref XXH32_hash_t. + * + * @param src The @ref XXH32_canonical_t to convert. + * + * @pre + * @p src must not be `NULL`. + * + * @return The converted hash. + * + * @see @ref canonical_representation_example "Canonical Representation Example" + */ +XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src); + + +/*! @cond Doxygen ignores this part */ +#ifdef __has_attribute +# define XXH_HAS_ATTRIBUTE(x) __has_attribute(x) +#else +# define XXH_HAS_ATTRIBUTE(x) 0 +#endif +/*! @endcond */ + +/*! @cond Doxygen ignores this part */ +/* + * C23 __STDC_VERSION__ number hasn't been specified yet. For now + * leave as `201711L` (C17 + 1). + * TODO: Update to correct value when its been specified. + */ +#define XXH_C23_VN 201711L +/*! @endcond */ + +/*! @cond Doxygen ignores this part */ +/* C-language Attributes are added in C23. */ +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= XXH_C23_VN) && defined(__has_c_attribute) +# define XXH_HAS_C_ATTRIBUTE(x) __has_c_attribute(x) +#else +# define XXH_HAS_C_ATTRIBUTE(x) 0 +#endif +/*! @endcond */ + +/*! @cond Doxygen ignores this part */ +#if defined(__cplusplus) && defined(__has_cpp_attribute) +# define XXH_HAS_CPP_ATTRIBUTE(x) __has_cpp_attribute(x) +#else +# define XXH_HAS_CPP_ATTRIBUTE(x) 0 +#endif +/*! @endcond */ + +/*! @cond Doxygen ignores this part */ +/* + * Define XXH_FALLTHROUGH macro for annotating switch case with the 'fallthrough' attribute + * introduced in CPP17 and C23. + * CPP17 : https://en.cppreference.com/w/cpp/language/attributes/fallthrough + * C23 : https://en.cppreference.com/w/c/language/attributes/fallthrough + */ +#if XXH_HAS_C_ATTRIBUTE(fallthrough) || XXH_HAS_CPP_ATTRIBUTE(fallthrough) +# define XXH_FALLTHROUGH [[fallthrough]] +#elif XXH_HAS_ATTRIBUTE(__fallthrough__) +# define XXH_FALLTHROUGH __attribute__ ((__fallthrough__)) +#else +# define XXH_FALLTHROUGH /* fallthrough */ +#endif +/*! @endcond */ + +/*! @cond Doxygen ignores this part */ +/* + * Define XXH_NOESCAPE for annotated pointers in public API. + * https://clang.llvm.org/docs/AttributeReference.html#noescape + * As of writing this, only supported by clang. + */ +#if XXH_HAS_ATTRIBUTE(noescape) +# define XXH_NOESCAPE __attribute__((noescape)) +#else +# define XXH_NOESCAPE +#endif +/*! @endcond */ + + +/*! + * @} + * @ingroup public + * @{ + */ + +#ifndef XXH_NO_LONG_LONG +/*-********************************************************************** +* 64-bit hash +************************************************************************/ +#if defined(XXH_DOXYGEN) /* don't include */ +/*! + * @brief An unsigned 64-bit integer. + * + * Not necessarily defined to `uint64_t` but functionally equivalent. + */ +typedef uint64_t XXH64_hash_t; +#elif !defined (__VMS) \ + && (defined (__cplusplus) \ + || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) +# ifdef _AIX +# include +# else +# include +# endif + typedef uint64_t XXH64_hash_t; +#else +# include +# if defined(__LP64__) && ULONG_MAX == 0xFFFFFFFFFFFFFFFFULL + /* LP64 ABI says uint64_t is unsigned long */ + typedef unsigned long XXH64_hash_t; +# else + /* the following type must have a width of 64-bit */ + typedef unsigned long long XXH64_hash_t; +# endif +#endif + +/*! + * @} + * + * @defgroup XXH64_family XXH64 family + * @ingroup public + * @{ + * Contains functions used in the classic 64-bit xxHash algorithm. + * + * @note + * XXH3 provides competitive speed for both 32-bit and 64-bit systems, + * and offers true 64/128 bit hash results. + * It provides better speed for systems with vector processing capabilities. + */ + +/*! + * @brief Calculates the 64-bit hash of @p input using xxHash64. + * + * @param input The block of data to be hashed, at least @p length bytes in size. + * @param length The length of @p input, in bytes. + * @param seed The 64-bit seed to alter the hash's output predictably. + * + * @pre + * The memory between @p input and @p input + @p length must be valid, + * readable, contiguous memory. However, if @p length is `0`, @p input may be + * `NULL`. In C++, this also must be *TriviallyCopyable*. + * + * @return The calculated 64-bit xxHash64 value. + * + * @see @ref single_shot_example "Single Shot Example" for an example. + */ +XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64(XXH_NOESCAPE const void* input, size_t length, XXH64_hash_t seed); + +/******* Streaming *******/ +#ifndef XXH_NO_STREAM +/*! + * @brief The opaque state struct for the XXH64 streaming API. + * + * @see XXH64_state_s for details. + */ +typedef struct XXH64_state_s XXH64_state_t; /* incomplete type */ + +/*! + * @brief Allocates an @ref XXH64_state_t. + * + * @return An allocated pointer of @ref XXH64_state_t on success. + * @return `NULL` on failure. + * + * @note Must be freed with XXH64_freeState(). + */ +XXH_PUBLIC_API XXH_MALLOCF XXH64_state_t* XXH64_createState(void); + +/*! + * @brief Frees an @ref XXH64_state_t. + * + * @param statePtr A pointer to an @ref XXH64_state_t allocated with @ref XXH64_createState(). + * + * @return @ref XXH_OK. + * + * @note @p statePtr must be allocated with XXH64_createState(). + */ +XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr); + +/*! + * @brief Copies one @ref XXH64_state_t to another. + * + * @param dst_state The state to copy to. + * @param src_state The state to copy from. + * @pre + * @p dst_state and @p src_state must not be `NULL` and must not overlap. + */ +XXH_PUBLIC_API void XXH64_copyState(XXH_NOESCAPE XXH64_state_t* dst_state, const XXH64_state_t* src_state); + +/*! + * @brief Resets an @ref XXH64_state_t to begin a new hash. + * + * @param statePtr The state struct to reset. + * @param seed The 64-bit seed to alter the hash result predictably. + * + * @pre + * @p statePtr must not be `NULL`. + * + * @return @ref XXH_OK on success. + * @return @ref XXH_ERROR on failure. + * + * @note This function resets and seeds a state. Call it before @ref XXH64_update(). + */ +XXH_PUBLIC_API XXH_errorcode XXH64_reset (XXH_NOESCAPE XXH64_state_t* statePtr, XXH64_hash_t seed); + +/*! + * @brief Consumes a block of @p input to an @ref XXH64_state_t. + * + * @param statePtr The state struct to update. + * @param input The block of data to be hashed, at least @p length bytes in size. + * @param length The length of @p input, in bytes. + * + * @pre + * @p statePtr must not be `NULL`. + * @pre + * The memory between @p input and @p input + @p length must be valid, + * readable, contiguous memory. However, if @p length is `0`, @p input may be + * `NULL`. In C++, this also must be *TriviallyCopyable*. + * + * @return @ref XXH_OK on success. + * @return @ref XXH_ERROR on failure. + * + * @note Call this to incrementally consume blocks of data. + */ +XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH_NOESCAPE XXH64_state_t* statePtr, XXH_NOESCAPE const void* input, size_t length); + +/*! + * @brief Returns the calculated hash value from an @ref XXH64_state_t. + * + * @param statePtr The state struct to calculate the hash from. + * + * @pre + * @p statePtr must not be `NULL`. + * + * @return The calculated 64-bit xxHash64 value from that state. + * + * @note + * Calling XXH64_digest() will not affect @p statePtr, so you can update, + * digest, and update again. + */ +XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64_digest (XXH_NOESCAPE const XXH64_state_t* statePtr); +#endif /* !XXH_NO_STREAM */ +/******* Canonical representation *******/ + +/*! + * @brief Canonical (big endian) representation of @ref XXH64_hash_t. + */ +typedef struct { unsigned char digest[sizeof(XXH64_hash_t)]; } XXH64_canonical_t; + +/*! + * @brief Converts an @ref XXH64_hash_t to a big endian @ref XXH64_canonical_t. + * + * @param dst The @ref XXH64_canonical_t pointer to be stored to. + * @param hash The @ref XXH64_hash_t to be converted. + * + * @pre + * @p dst must not be `NULL`. + * + * @see @ref canonical_representation_example "Canonical Representation Example" + */ +XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH_NOESCAPE XXH64_canonical_t* dst, XXH64_hash_t hash); + +/*! + * @brief Converts an @ref XXH64_canonical_t to a native @ref XXH64_hash_t. + * + * @param src The @ref XXH64_canonical_t to convert. + * + * @pre + * @p src must not be `NULL`. + * + * @return The converted hash. + * + * @see @ref canonical_representation_example "Canonical Representation Example" + */ +XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64_hashFromCanonical(XXH_NOESCAPE const XXH64_canonical_t* src); + +#ifndef XXH_NO_XXH3 + +/*! + * @} + * ************************************************************************ + * @defgroup XXH3_family XXH3 family + * @ingroup public + * @{ + * + * XXH3 is a more recent hash algorithm featuring: + * - Improved speed for both small and large inputs + * - True 64-bit and 128-bit outputs + * - SIMD acceleration + * - Improved 32-bit viability + * + * Speed analysis methodology is explained here: + * + * https://fastcompression.blogspot.com/2019/03/presenting-xxh3.html + * + * Compared to XXH64, expect XXH3 to run approximately + * ~2x faster on large inputs and >3x faster on small ones, + * exact differences vary depending on platform. + * + * XXH3's speed benefits greatly from SIMD and 64-bit arithmetic, + * but does not require it. + * Most 32-bit and 64-bit targets that can run XXH32 smoothly can run XXH3 + * at competitive speeds, even without vector support. Further details are + * explained in the implementation. + * + * XXH3 has a fast scalar implementation, but it also includes accelerated SIMD + * implementations for many common platforms: + * - AVX512 + * - AVX2 + * - SSE2 + * - ARM NEON + * - WebAssembly SIMD128 + * - POWER8 VSX + * - s390x ZVector + * This can be controlled via the @ref XXH_VECTOR macro, but it automatically + * selects the best version according to predefined macros. For the x86 family, an + * automatic runtime dispatcher is included separately in @ref xxh_x86dispatch.c. + * + * XXH3 implementation is portable: + * it has a generic C90 formulation that can be compiled on any platform, + * all implementations generate exactly the same hash value on all platforms. + * Starting from v0.8.0, it's also labelled "stable", meaning that + * any future version will also generate the same hash value. + * + * XXH3 offers 2 variants, _64bits and _128bits. + * + * When only 64 bits are needed, prefer invoking the _64bits variant, as it + * reduces the amount of mixing, resulting in faster speed on small inputs. + * It's also generally simpler to manipulate a scalar return type than a struct. + * + * The API supports one-shot hashing, streaming mode, and custom secrets. + */ +/*-********************************************************************** +* XXH3 64-bit variant +************************************************************************/ + +/*! + * @brief Calculates 64-bit unseeded variant of XXH3 hash of @p input. + * + * @param input The block of data to be hashed, at least @p length bytes in size. + * @param length The length of @p input, in bytes. + * + * @pre + * The memory between @p input and @p input + @p length must be valid, + * readable, contiguous memory. However, if @p length is `0`, @p input may be + * `NULL`. In C++, this also must be *TriviallyCopyable*. + * + * @return The calculated 64-bit XXH3 hash value. + * + * @note + * This is equivalent to @ref XXH3_64bits_withSeed() with a seed of `0`, however + * it may have slightly better performance due to constant propagation of the + * defaults. + * + * @see + * XXH3_64bits_withSeed(), XXH3_64bits_withSecret(): other seeding variants + * @see @ref single_shot_example "Single Shot Example" for an example. + */ +XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits(XXH_NOESCAPE const void* input, size_t length); + +/*! + * @brief Calculates 64-bit seeded variant of XXH3 hash of @p input. + * + * @param input The block of data to be hashed, at least @p length bytes in size. + * @param length The length of @p input, in bytes. + * @param seed The 64-bit seed to alter the hash result predictably. + * + * @pre + * The memory between @p input and @p input + @p length must be valid, + * readable, contiguous memory. However, if @p length is `0`, @p input may be + * `NULL`. In C++, this also must be *TriviallyCopyable*. + * + * @return The calculated 64-bit XXH3 hash value. + * + * @note + * seed == 0 produces the same results as @ref XXH3_64bits(). + * + * This variant generates a custom secret on the fly based on default secret + * altered using the @p seed value. + * + * While this operation is decently fast, note that it's not completely free. + * + * @see @ref single_shot_example "Single Shot Example" for an example. + */ +XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_withSeed(XXH_NOESCAPE const void* input, size_t length, XXH64_hash_t seed); + +/*! + * The bare minimum size for a custom secret. + * + * @see + * XXH3_64bits_withSecret(), XXH3_64bits_reset_withSecret(), + * XXH3_128bits_withSecret(), XXH3_128bits_reset_withSecret(). + */ +#define XXH3_SECRET_SIZE_MIN 136 + +/*! + * @brief Calculates 64-bit variant of XXH3 with a custom "secret". + * + * @param data The block of data to be hashed, at least @p len bytes in size. + * @param len The length of @p data, in bytes. + * @param secret The secret data. + * @param secretSize The length of @p secret, in bytes. + * + * @return The calculated 64-bit XXH3 hash value. + * + * @pre + * The memory between @p data and @p data + @p len must be valid, + * readable, contiguous memory. However, if @p length is `0`, @p data may be + * `NULL`. In C++, this also must be *TriviallyCopyable*. + * + * It's possible to provide any blob of bytes as a "secret" to generate the hash. + * This makes it more difficult for an external actor to prepare an intentional collision. + * The main condition is that @p secretSize *must* be large enough (>= @ref XXH3_SECRET_SIZE_MIN). + * However, the quality of the secret impacts the dispersion of the hash algorithm. + * Therefore, the secret _must_ look like a bunch of random bytes. + * Avoid "trivial" or structured data such as repeated sequences or a text document. + * Whenever in doubt about the "randomness" of the blob of bytes, + * consider employing @ref XXH3_generateSecret() instead (see below). + * It will generate a proper high entropy secret derived from the blob of bytes. + * Another advantage of using XXH3_generateSecret() is that + * it guarantees that all bits within the initial blob of bytes + * will impact every bit of the output. + * This is not necessarily the case when using the blob of bytes directly + * because, when hashing _small_ inputs, only a portion of the secret is employed. + * + * @see @ref single_shot_example "Single Shot Example" for an example. + */ +XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_withSecret(XXH_NOESCAPE const void* data, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize); + + +/******* Streaming *******/ +#ifndef XXH_NO_STREAM +/* + * Streaming requires state maintenance. + * This operation costs memory and CPU. + * As a consequence, streaming is slower than one-shot hashing. + * For better performance, prefer one-shot functions whenever applicable. + */ + +/*! + * @brief The opaque state struct for the XXH3 streaming API. + * + * @see XXH3_state_s for details. + */ +typedef struct XXH3_state_s XXH3_state_t; +XXH_PUBLIC_API XXH_MALLOCF XXH3_state_t* XXH3_createState(void); +XXH_PUBLIC_API XXH_errorcode XXH3_freeState(XXH3_state_t* statePtr); + +/*! + * @brief Copies one @ref XXH3_state_t to another. + * + * @param dst_state The state to copy to. + * @param src_state The state to copy from. + * @pre + * @p dst_state and @p src_state must not be `NULL` and must not overlap. + */ +XXH_PUBLIC_API void XXH3_copyState(XXH_NOESCAPE XXH3_state_t* dst_state, XXH_NOESCAPE const XXH3_state_t* src_state); + +/*! + * @brief Resets an @ref XXH3_state_t to begin a new hash. + * + * @param statePtr The state struct to reset. + * + * @pre + * @p statePtr must not be `NULL`. + * + * @return @ref XXH_OK on success. + * @return @ref XXH_ERROR on failure. + * + * @note + * - This function resets `statePtr` and generate a secret with default parameters. + * - Call this function before @ref XXH3_64bits_update(). + * - Digest will be equivalent to `XXH3_64bits()`. + * + */ +XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset(XXH_NOESCAPE XXH3_state_t* statePtr); + +/*! + * @brief Resets an @ref XXH3_state_t with 64-bit seed to begin a new hash. + * + * @param statePtr The state struct to reset. + * @param seed The 64-bit seed to alter the hash result predictably. + * + * @pre + * @p statePtr must not be `NULL`. + * + * @return @ref XXH_OK on success. + * @return @ref XXH_ERROR on failure. + * + * @note + * - This function resets `statePtr` and generate a secret from `seed`. + * - Call this function before @ref XXH3_64bits_update(). + * - Digest will be equivalent to `XXH3_64bits_withSeed()`. + * + */ +XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH64_hash_t seed); + +/*! + * @brief Resets an @ref XXH3_state_t with secret data to begin a new hash. + * + * @param statePtr The state struct to reset. + * @param secret The secret data. + * @param secretSize The length of @p secret, in bytes. + * + * @pre + * @p statePtr must not be `NULL`. + * + * @return @ref XXH_OK on success. + * @return @ref XXH_ERROR on failure. + * + * @note + * `secret` is referenced, it _must outlive_ the hash streaming session. + * + * Similar to one-shot API, `secretSize` must be >= @ref XXH3_SECRET_SIZE_MIN, + * and the quality of produced hash values depends on secret's entropy + * (secret's content should look like a bunch of random bytes). + * When in doubt about the randomness of a candidate `secret`, + * consider employing `XXH3_generateSecret()` instead (see below). + */ +XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize); + +/*! + * @brief Consumes a block of @p input to an @ref XXH3_state_t. + * + * @param statePtr The state struct to update. + * @param input The block of data to be hashed, at least @p length bytes in size. + * @param length The length of @p input, in bytes. + * + * @pre + * @p statePtr must not be `NULL`. + * @pre + * The memory between @p input and @p input + @p length must be valid, + * readable, contiguous memory. However, if @p length is `0`, @p input may be + * `NULL`. In C++, this also must be *TriviallyCopyable*. + * + * @return @ref XXH_OK on success. + * @return @ref XXH_ERROR on failure. + * + * @note Call this to incrementally consume blocks of data. + */ +XXH_PUBLIC_API XXH_errorcode XXH3_64bits_update (XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* input, size_t length); + +/*! + * @brief Returns the calculated XXH3 64-bit hash value from an @ref XXH3_state_t. + * + * @param statePtr The state struct to calculate the hash from. + * + * @pre + * @p statePtr must not be `NULL`. + * + * @return The calculated XXH3 64-bit hash value from that state. + * + * @note + * Calling XXH3_64bits_digest() will not affect @p statePtr, so you can update, + * digest, and update again. + */ +XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_digest (XXH_NOESCAPE const XXH3_state_t* statePtr); +#endif /* !XXH_NO_STREAM */ + +/* note : canonical representation of XXH3 is the same as XXH64 + * since they both produce XXH64_hash_t values */ + + +/*-********************************************************************** +* XXH3 128-bit variant +************************************************************************/ + +/*! + * @brief The return value from 128-bit hashes. + * + * Stored in little endian order, although the fields themselves are in native + * endianness. + */ +typedef struct { + XXH64_hash_t low64; /*!< `value & 0xFFFFFFFFFFFFFFFF` */ + XXH64_hash_t high64; /*!< `value >> 64` */ +} XXH128_hash_t; + +/*! + * @brief Calculates 128-bit unseeded variant of XXH3 of @p data. + * + * @param data The block of data to be hashed, at least @p length bytes in size. + * @param len The length of @p data, in bytes. + * + * @return The calculated 128-bit variant of XXH3 value. + * + * The 128-bit variant of XXH3 has more strength, but it has a bit of overhead + * for shorter inputs. + * + * This is equivalent to @ref XXH3_128bits_withSeed() with a seed of `0`, however + * it may have slightly better performance due to constant propagation of the + * defaults. + * + * @see XXH3_128bits_withSeed(), XXH3_128bits_withSecret(): other seeding variants + * @see @ref single_shot_example "Single Shot Example" for an example. + */ +XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits(XXH_NOESCAPE const void* data, size_t len); +/*! @brief Calculates 128-bit seeded variant of XXH3 hash of @p data. + * + * @param data The block of data to be hashed, at least @p length bytes in size. + * @param len The length of @p data, in bytes. + * @param seed The 64-bit seed to alter the hash result predictably. + * + * @return The calculated 128-bit variant of XXH3 value. + * + * @note + * seed == 0 produces the same results as @ref XXH3_64bits(). + * + * This variant generates a custom secret on the fly based on default secret + * altered using the @p seed value. + * + * While this operation is decently fast, note that it's not completely free. + * + * @see XXH3_128bits(), XXH3_128bits_withSecret(): other seeding variants + * @see @ref single_shot_example "Single Shot Example" for an example. + */ +XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_withSeed(XXH_NOESCAPE const void* data, size_t len, XXH64_hash_t seed); +/*! + * @brief Calculates 128-bit variant of XXH3 with a custom "secret". + * + * @param data The block of data to be hashed, at least @p len bytes in size. + * @param len The length of @p data, in bytes. + * @param secret The secret data. + * @param secretSize The length of @p secret, in bytes. + * + * @return The calculated 128-bit variant of XXH3 value. + * + * It's possible to provide any blob of bytes as a "secret" to generate the hash. + * This makes it more difficult for an external actor to prepare an intentional collision. + * The main condition is that @p secretSize *must* be large enough (>= @ref XXH3_SECRET_SIZE_MIN). + * However, the quality of the secret impacts the dispersion of the hash algorithm. + * Therefore, the secret _must_ look like a bunch of random bytes. + * Avoid "trivial" or structured data such as repeated sequences or a text document. + * Whenever in doubt about the "randomness" of the blob of bytes, + * consider employing @ref XXH3_generateSecret() instead (see below). + * It will generate a proper high entropy secret derived from the blob of bytes. + * Another advantage of using XXH3_generateSecret() is that + * it guarantees that all bits within the initial blob of bytes + * will impact every bit of the output. + * This is not necessarily the case when using the blob of bytes directly + * because, when hashing _small_ inputs, only a portion of the secret is employed. + * + * @see @ref single_shot_example "Single Shot Example" for an example. + */ +XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_withSecret(XXH_NOESCAPE const void* data, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize); + +/******* Streaming *******/ +#ifndef XXH_NO_STREAM +/* + * Streaming requires state maintenance. + * This operation costs memory and CPU. + * As a consequence, streaming is slower than one-shot hashing. + * For better performance, prefer one-shot functions whenever applicable. + * + * XXH3_128bits uses the same XXH3_state_t as XXH3_64bits(). + * Use already declared XXH3_createState() and XXH3_freeState(). + * + * All reset and streaming functions have same meaning as their 64-bit counterpart. + */ + +/*! + * @brief Resets an @ref XXH3_state_t to begin a new hash. + * + * @param statePtr The state struct to reset. + * + * @pre + * @p statePtr must not be `NULL`. + * + * @return @ref XXH_OK on success. + * @return @ref XXH_ERROR on failure. + * + * @note + * - This function resets `statePtr` and generate a secret with default parameters. + * - Call it before @ref XXH3_128bits_update(). + * - Digest will be equivalent to `XXH3_128bits()`. + */ +XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset(XXH_NOESCAPE XXH3_state_t* statePtr); + +/*! + * @brief Resets an @ref XXH3_state_t with 64-bit seed to begin a new hash. + * + * @param statePtr The state struct to reset. + * @param seed The 64-bit seed to alter the hash result predictably. + * + * @pre + * @p statePtr must not be `NULL`. + * + * @return @ref XXH_OK on success. + * @return @ref XXH_ERROR on failure. + * + * @note + * - This function resets `statePtr` and generate a secret from `seed`. + * - Call it before @ref XXH3_128bits_update(). + * - Digest will be equivalent to `XXH3_128bits_withSeed()`. + */ +XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH64_hash_t seed); +/*! + * @brief Resets an @ref XXH3_state_t with secret data to begin a new hash. + * + * @param statePtr The state struct to reset. + * @param secret The secret data. + * @param secretSize The length of @p secret, in bytes. + * + * @pre + * @p statePtr must not be `NULL`. + * + * @return @ref XXH_OK on success. + * @return @ref XXH_ERROR on failure. + * + * `secret` is referenced, it _must outlive_ the hash streaming session. + * Similar to one-shot API, `secretSize` must be >= @ref XXH3_SECRET_SIZE_MIN, + * and the quality of produced hash values depends on secret's entropy + * (secret's content should look like a bunch of random bytes). + * When in doubt about the randomness of a candidate `secret`, + * consider employing `XXH3_generateSecret()` instead (see below). + */ +XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize); + +/*! + * @brief Consumes a block of @p input to an @ref XXH3_state_t. + * + * Call this to incrementally consume blocks of data. + * + * @param statePtr The state struct to update. + * @param input The block of data to be hashed, at least @p length bytes in size. + * @param length The length of @p input, in bytes. + * + * @pre + * @p statePtr must not be `NULL`. + * + * @return @ref XXH_OK on success. + * @return @ref XXH_ERROR on failure. + * + * @note + * The memory between @p input and @p input + @p length must be valid, + * readable, contiguous memory. However, if @p length is `0`, @p input may be + * `NULL`. In C++, this also must be *TriviallyCopyable*. + * + */ +XXH_PUBLIC_API XXH_errorcode XXH3_128bits_update (XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* input, size_t length); + +/*! + * @brief Returns the calculated XXH3 128-bit hash value from an @ref XXH3_state_t. + * + * @param statePtr The state struct to calculate the hash from. + * + * @pre + * @p statePtr must not be `NULL`. + * + * @return The calculated XXH3 128-bit hash value from that state. + * + * @note + * Calling XXH3_128bits_digest() will not affect @p statePtr, so you can update, + * digest, and update again. + * + */ +XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_digest (XXH_NOESCAPE const XXH3_state_t* statePtr); +#endif /* !XXH_NO_STREAM */ + +/* Following helper functions make it possible to compare XXH128_hast_t values. + * Since XXH128_hash_t is a structure, this capability is not offered by the language. + * Note: For better performance, these functions can be inlined using XXH_INLINE_ALL */ + +/*! + * @brief Check equality of two XXH128_hash_t values + * + * @param h1 The 128-bit hash value. + * @param h2 Another 128-bit hash value. + * + * @return `1` if `h1` and `h2` are equal. + * @return `0` if they are not. + */ +XXH_PUBLIC_API XXH_PUREF int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2); + +/*! + * @brief Compares two @ref XXH128_hash_t + * + * This comparator is compatible with stdlib's `qsort()`/`bsearch()`. + * + * @param h128_1 Left-hand side value + * @param h128_2 Right-hand side value + * + * @return >0 if @p h128_1 > @p h128_2 + * @return =0 if @p h128_1 == @p h128_2 + * @return <0 if @p h128_1 < @p h128_2 + */ +XXH_PUBLIC_API XXH_PUREF int XXH128_cmp(XXH_NOESCAPE const void* h128_1, XXH_NOESCAPE const void* h128_2); + + +/******* Canonical representation *******/ +typedef struct { unsigned char digest[sizeof(XXH128_hash_t)]; } XXH128_canonical_t; + + +/*! + * @brief Converts an @ref XXH128_hash_t to a big endian @ref XXH128_canonical_t. + * + * @param dst The @ref XXH128_canonical_t pointer to be stored to. + * @param hash The @ref XXH128_hash_t to be converted. + * + * @pre + * @p dst must not be `NULL`. + * @see @ref canonical_representation_example "Canonical Representation Example" + */ +XXH_PUBLIC_API void XXH128_canonicalFromHash(XXH_NOESCAPE XXH128_canonical_t* dst, XXH128_hash_t hash); + +/*! + * @brief Converts an @ref XXH128_canonical_t to a native @ref XXH128_hash_t. + * + * @param src The @ref XXH128_canonical_t to convert. + * + * @pre + * @p src must not be `NULL`. + * + * @return The converted hash. + * @see @ref canonical_representation_example "Canonical Representation Example" + */ +XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH128_hashFromCanonical(XXH_NOESCAPE const XXH128_canonical_t* src); + + +#endif /* !XXH_NO_XXH3 */ +#endif /* XXH_NO_LONG_LONG */ + +/*! + * @} + */ +#endif /* XXHASH_H_5627135585666179 */ + + + +#if defined(XXH_STATIC_LINKING_ONLY) && !defined(XXHASH_H_STATIC_13879238742) +#define XXHASH_H_STATIC_13879238742 +/* **************************************************************************** + * This section contains declarations which are not guaranteed to remain stable. + * They may change in future versions, becoming incompatible with a different + * version of the library. + * These declarations should only be used with static linking. + * Never use them in association with dynamic linking! + ***************************************************************************** */ + +/* + * These definitions are only present to allow static allocation + * of XXH states, on stack or in a struct, for example. + * Never **ever** access their members directly. + */ + +/*! + * @internal + * @brief Structure for XXH32 streaming API. + * + * @note This is only defined when @ref XXH_STATIC_LINKING_ONLY, + * @ref XXH_INLINE_ALL, or @ref XXH_IMPLEMENTATION is defined. Otherwise it is + * an opaque type. This allows fields to safely be changed. + * + * Typedef'd to @ref XXH32_state_t. + * Do not access the members of this struct directly. + * @see XXH64_state_s, XXH3_state_s + */ +struct XXH32_state_s { + XXH32_hash_t total_len_32; /*!< Total length hashed, modulo 2^32 */ + XXH32_hash_t large_len; /*!< Whether the hash is >= 16 (handles @ref total_len_32 overflow) */ + XXH32_hash_t v[4]; /*!< Accumulator lanes */ + XXH32_hash_t mem32[4]; /*!< Internal buffer for partial reads. Treated as unsigned char[16]. */ + XXH32_hash_t memsize; /*!< Amount of data in @ref mem32 */ + XXH32_hash_t reserved; /*!< Reserved field. Do not read nor write to it. */ +}; /* typedef'd to XXH32_state_t */ + + +#ifndef XXH_NO_LONG_LONG /* defined when there is no 64-bit support */ + +/*! + * @internal + * @brief Structure for XXH64 streaming API. + * + * @note This is only defined when @ref XXH_STATIC_LINKING_ONLY, + * @ref XXH_INLINE_ALL, or @ref XXH_IMPLEMENTATION is defined. Otherwise it is + * an opaque type. This allows fields to safely be changed. + * + * Typedef'd to @ref XXH64_state_t. + * Do not access the members of this struct directly. + * @see XXH32_state_s, XXH3_state_s + */ +struct XXH64_state_s { + XXH64_hash_t total_len; /*!< Total length hashed. This is always 64-bit. */ + XXH64_hash_t v[4]; /*!< Accumulator lanes */ + XXH64_hash_t mem64[4]; /*!< Internal buffer for partial reads. Treated as unsigned char[32]. */ + XXH32_hash_t memsize; /*!< Amount of data in @ref mem64 */ + XXH32_hash_t reserved32; /*!< Reserved field, needed for padding anyways*/ + XXH64_hash_t reserved64; /*!< Reserved field. Do not read or write to it. */ +}; /* typedef'd to XXH64_state_t */ + +#ifndef XXH_NO_XXH3 + +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) /* >= C11 */ +# include +# define XXH_ALIGN(n) alignas(n) +#elif defined(__cplusplus) && (__cplusplus >= 201103L) /* >= C++11 */ +/* In C++ alignas() is a keyword */ +# define XXH_ALIGN(n) alignas(n) +#elif defined(__GNUC__) +# define XXH_ALIGN(n) __attribute__ ((aligned(n))) +#elif defined(_MSC_VER) +# define XXH_ALIGN(n) __declspec(align(n)) +#else +# define XXH_ALIGN(n) /* disabled */ +#endif + +/* Old GCC versions only accept the attribute after the type in structures. */ +#if !(defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) /* C11+ */ \ + && ! (defined(__cplusplus) && (__cplusplus >= 201103L)) /* >= C++11 */ \ + && defined(__GNUC__) +# define XXH_ALIGN_MEMBER(align, type) type XXH_ALIGN(align) +#else +# define XXH_ALIGN_MEMBER(align, type) XXH_ALIGN(align) type +#endif + +/*! + * @brief The size of the internal XXH3 buffer. + * + * This is the optimal update size for incremental hashing. + * + * @see XXH3_64b_update(), XXH3_128b_update(). + */ +#define XXH3_INTERNALBUFFER_SIZE 256 + +/*! + * @internal + * @brief Default size of the secret buffer (and @ref XXH3_kSecret). + * + * This is the size used in @ref XXH3_kSecret and the seeded functions. + * + * Not to be confused with @ref XXH3_SECRET_SIZE_MIN. + */ +#define XXH3_SECRET_DEFAULT_SIZE 192 + +/*! + * @internal + * @brief Structure for XXH3 streaming API. + * + * @note This is only defined when @ref XXH_STATIC_LINKING_ONLY, + * @ref XXH_INLINE_ALL, or @ref XXH_IMPLEMENTATION is defined. + * Otherwise it is an opaque type. + * Never use this definition in combination with dynamic library. + * This allows fields to safely be changed in the future. + * + * @note ** This structure has a strict alignment requirement of 64 bytes!! ** + * Do not allocate this with `malloc()` or `new`, + * it will not be sufficiently aligned. + * Use @ref XXH3_createState() and @ref XXH3_freeState(), or stack allocation. + * + * Typedef'd to @ref XXH3_state_t. + * Do never access the members of this struct directly. + * + * @see XXH3_INITSTATE() for stack initialization. + * @see XXH3_createState(), XXH3_freeState(). + * @see XXH32_state_s, XXH64_state_s + */ +struct XXH3_state_s { + XXH_ALIGN_MEMBER(64, XXH64_hash_t acc[8]); + /*!< The 8 accumulators. See @ref XXH32_state_s::v and @ref XXH64_state_s::v */ + XXH_ALIGN_MEMBER(64, unsigned char customSecret[XXH3_SECRET_DEFAULT_SIZE]); + /*!< Used to store a custom secret generated from a seed. */ + XXH_ALIGN_MEMBER(64, unsigned char buffer[XXH3_INTERNALBUFFER_SIZE]); + /*!< The internal buffer. @see XXH32_state_s::mem32 */ + XXH32_hash_t bufferedSize; + /*!< The amount of memory in @ref buffer, @see XXH32_state_s::memsize */ + XXH32_hash_t useSeed; + /*!< Reserved field. Needed for padding on 64-bit. */ + size_t nbStripesSoFar; + /*!< Number or stripes processed. */ + XXH64_hash_t totalLen; + /*!< Total length hashed. 64-bit even on 32-bit targets. */ + size_t nbStripesPerBlock; + /*!< Number of stripes per block. */ + size_t secretLimit; + /*!< Size of @ref customSecret or @ref extSecret */ + XXH64_hash_t seed; + /*!< Seed for _withSeed variants. Must be zero otherwise, @see XXH3_INITSTATE() */ + XXH64_hash_t reserved64; + /*!< Reserved field. */ + const unsigned char* extSecret; + /*!< Reference to an external secret for the _withSecret variants, NULL + * for other variants. */ + /* note: there may be some padding at the end due to alignment on 64 bytes */ +}; /* typedef'd to XXH3_state_t */ + +#undef XXH_ALIGN_MEMBER + +/*! + * @brief Initializes a stack-allocated `XXH3_state_s`. + * + * When the @ref XXH3_state_t structure is merely emplaced on stack, + * it should be initialized with XXH3_INITSTATE() or a memset() + * in case its first reset uses XXH3_NNbits_reset_withSeed(). + * This init can be omitted if the first reset uses default or _withSecret mode. + * This operation isn't necessary when the state is created with XXH3_createState(). + * Note that this doesn't prepare the state for a streaming operation, + * it's still necessary to use XXH3_NNbits_reset*() afterwards. + */ +#define XXH3_INITSTATE(XXH3_state_ptr) \ + do { \ + XXH3_state_t* tmp_xxh3_state_ptr = (XXH3_state_ptr); \ + tmp_xxh3_state_ptr->seed = 0; \ + tmp_xxh3_state_ptr->extSecret = NULL; \ + } while(0) + + +/*! + * @brief Calculates the 128-bit hash of @p data using XXH3. + * + * @param data The block of data to be hashed, at least @p len bytes in size. + * @param len The length of @p data, in bytes. + * @param seed The 64-bit seed to alter the hash's output predictably. + * + * @pre + * The memory between @p data and @p data + @p len must be valid, + * readable, contiguous memory. However, if @p len is `0`, @p data may be + * `NULL`. In C++, this also must be *TriviallyCopyable*. + * + * @return The calculated 128-bit XXH3 value. + * + * @see @ref single_shot_example "Single Shot Example" for an example. + */ +XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH128(XXH_NOESCAPE const void* data, size_t len, XXH64_hash_t seed); + + +/* === Experimental API === */ +/* Symbols defined below must be considered tied to a specific library version. */ + +/*! + * @brief Derive a high-entropy secret from any user-defined content, named customSeed. + * + * @param secretBuffer A writable buffer for derived high-entropy secret data. + * @param secretSize Size of secretBuffer, in bytes. Must be >= XXH3_SECRET_DEFAULT_SIZE. + * @param customSeed A user-defined content. + * @param customSeedSize Size of customSeed, in bytes. + * + * @return @ref XXH_OK on success. + * @return @ref XXH_ERROR on failure. + * + * The generated secret can be used in combination with `*_withSecret()` functions. + * The `_withSecret()` variants are useful to provide a higher level of protection + * than 64-bit seed, as it becomes much more difficult for an external actor to + * guess how to impact the calculation logic. + * + * The function accepts as input a custom seed of any length and any content, + * and derives from it a high-entropy secret of length @p secretSize into an + * already allocated buffer @p secretBuffer. + * + * The generated secret can then be used with any `*_withSecret()` variant. + * The functions @ref XXH3_128bits_withSecret(), @ref XXH3_64bits_withSecret(), + * @ref XXH3_128bits_reset_withSecret() and @ref XXH3_64bits_reset_withSecret() + * are part of this list. They all accept a `secret` parameter + * which must be large enough for implementation reasons (>= @ref XXH3_SECRET_SIZE_MIN) + * _and_ feature very high entropy (consist of random-looking bytes). + * These conditions can be a high bar to meet, so @ref XXH3_generateSecret() can + * be employed to ensure proper quality. + * + * @p customSeed can be anything. It can have any size, even small ones, + * and its content can be anything, even "poor entropy" sources such as a bunch + * of zeroes. The resulting `secret` will nonetheless provide all required qualities. + * + * @pre + * - @p secretSize must be >= @ref XXH3_SECRET_SIZE_MIN + * - When @p customSeedSize > 0, supplying NULL as customSeed is undefined behavior. + * + * Example code: + * @code{.c} + * #include + * #include + * #include + * #define XXH_STATIC_LINKING_ONLY // expose unstable API + * #include "xxhash.h" + * // Hashes argv[2] using the entropy from argv[1]. + * int main(int argc, char* argv[]) + * { + * char secret[XXH3_SECRET_SIZE_MIN]; + * if (argv != 3) { return 1; } + * XXH3_generateSecret(secret, sizeof(secret), argv[1], strlen(argv[1])); + * XXH64_hash_t h = XXH3_64bits_withSecret( + * argv[2], strlen(argv[2]), + * secret, sizeof(secret) + * ); + * printf("%016llx\n", (unsigned long long) h); + * } + * @endcode + */ +XXH_PUBLIC_API XXH_errorcode XXH3_generateSecret(XXH_NOESCAPE void* secretBuffer, size_t secretSize, XXH_NOESCAPE const void* customSeed, size_t customSeedSize); + +/*! + * @brief Generate the same secret as the _withSeed() variants. + * + * @param secretBuffer A writable buffer of @ref XXH3_SECRET_SIZE_MIN bytes + * @param seed The 64-bit seed to alter the hash result predictably. + * + * The generated secret can be used in combination with + *`*_withSecret()` and `_withSecretandSeed()` variants. + * + * Example C++ `std::string` hash class: + * @code{.cpp} + * #include + * #define XXH_STATIC_LINKING_ONLY // expose unstable API + * #include "xxhash.h" + * // Slow, seeds each time + * class HashSlow { + * XXH64_hash_t seed; + * public: + * HashSlow(XXH64_hash_t s) : seed{s} {} + * size_t operator()(const std::string& x) const { + * return size_t{XXH3_64bits_withSeed(x.c_str(), x.length(), seed)}; + * } + * }; + * // Fast, caches the seeded secret for future uses. + * class HashFast { + * unsigned char secret[XXH3_SECRET_SIZE_MIN]; + * public: + * HashFast(XXH64_hash_t s) { + * XXH3_generateSecret_fromSeed(secret, seed); + * } + * size_t operator()(const std::string& x) const { + * return size_t{ + * XXH3_64bits_withSecret(x.c_str(), x.length(), secret, sizeof(secret)) + * }; + * } + * }; + * @endcode + */ +XXH_PUBLIC_API void XXH3_generateSecret_fromSeed(XXH_NOESCAPE void* secretBuffer, XXH64_hash_t seed); + +/*! + * @brief Calculates 64/128-bit seeded variant of XXH3 hash of @p data. + * + * @param data The block of data to be hashed, at least @p len bytes in size. + * @param len The length of @p data, in bytes. + * @param secret The secret data. + * @param secretSize The length of @p secret, in bytes. + * @param seed The 64-bit seed to alter the hash result predictably. + * + * These variants generate hash values using either + * @p seed for "short" keys (< @ref XXH3_MIDSIZE_MAX = 240 bytes) + * or @p secret for "large" keys (>= @ref XXH3_MIDSIZE_MAX). + * + * This generally benefits speed, compared to `_withSeed()` or `_withSecret()`. + * `_withSeed()` has to generate the secret on the fly for "large" keys. + * It's fast, but can be perceptible for "not so large" keys (< 1 KB). + * `_withSecret()` has to generate the masks on the fly for "small" keys, + * which requires more instructions than _withSeed() variants. + * Therefore, _withSecretandSeed variant combines the best of both worlds. + * + * When @p secret has been generated by XXH3_generateSecret_fromSeed(), + * this variant produces *exactly* the same results as `_withSeed()` variant, + * hence offering only a pure speed benefit on "large" input, + * by skipping the need to regenerate the secret for every large input. + * + * Another usage scenario is to hash the secret to a 64-bit hash value, + * for example with XXH3_64bits(), which then becomes the seed, + * and then employ both the seed and the secret in _withSecretandSeed(). + * On top of speed, an added benefit is that each bit in the secret + * has a 50% chance to swap each bit in the output, via its impact to the seed. + * + * This is not guaranteed when using the secret directly in "small data" scenarios, + * because only portions of the secret are employed for small data. + */ +XXH_PUBLIC_API XXH_PUREF XXH64_hash_t +XXH3_64bits_withSecretandSeed(XXH_NOESCAPE const void* data, size_t len, + XXH_NOESCAPE const void* secret, size_t secretSize, + XXH64_hash_t seed); +/*! + * @brief Calculates 128-bit seeded variant of XXH3 hash of @p data. + * + * @param input The block of data to be hashed, at least @p len bytes in size. + * @param length The length of @p data, in bytes. + * @param secret The secret data. + * @param secretSize The length of @p secret, in bytes. + * @param seed64 The 64-bit seed to alter the hash result predictably. + * + * @return @ref XXH_OK on success. + * @return @ref XXH_ERROR on failure. + * + * @see XXH3_64bits_withSecretandSeed() + */ +XXH_PUBLIC_API XXH_PUREF XXH128_hash_t +XXH3_128bits_withSecretandSeed(XXH_NOESCAPE const void* input, size_t length, + XXH_NOESCAPE const void* secret, size_t secretSize, + XXH64_hash_t seed64); +#ifndef XXH_NO_STREAM +/*! + * @brief Resets an @ref XXH3_state_t with secret data to begin a new hash. + * + * @param statePtr A pointer to an @ref XXH3_state_t allocated with @ref XXH3_createState(). + * @param secret The secret data. + * @param secretSize The length of @p secret, in bytes. + * @param seed64 The 64-bit seed to alter the hash result predictably. + * + * @return @ref XXH_OK on success. + * @return @ref XXH_ERROR on failure. + * + * @see XXH3_64bits_withSecretandSeed() + */ +XXH_PUBLIC_API XXH_errorcode +XXH3_64bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr, + XXH_NOESCAPE const void* secret, size_t secretSize, + XXH64_hash_t seed64); +/*! + * @brief Resets an @ref XXH3_state_t with secret data to begin a new hash. + * + * @param statePtr A pointer to an @ref XXH3_state_t allocated with @ref XXH3_createState(). + * @param secret The secret data. + * @param secretSize The length of @p secret, in bytes. + * @param seed64 The 64-bit seed to alter the hash result predictably. + * + * @return @ref XXH_OK on success. + * @return @ref XXH_ERROR on failure. + * + * @see XXH3_64bits_withSecretandSeed() + */ +XXH_PUBLIC_API XXH_errorcode +XXH3_128bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr, + XXH_NOESCAPE const void* secret, size_t secretSize, + XXH64_hash_t seed64); +#endif /* !XXH_NO_STREAM */ + +#endif /* !XXH_NO_XXH3 */ +#endif /* XXH_NO_LONG_LONG */ +#if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API) +# define XXH_IMPLEMENTATION +#endif + +#endif /* defined(XXH_STATIC_LINKING_ONLY) && !defined(XXHASH_H_STATIC_13879238742) */ + + +/* ======================================================================== */ +/* ======================================================================== */ +/* ======================================================================== */ + + +/*-********************************************************************** + * xxHash implementation + *-********************************************************************** + * xxHash's implementation used to be hosted inside xxhash.c. + * + * However, inlining requires implementation to be visible to the compiler, + * hence be included alongside the header. + * Previously, implementation was hosted inside xxhash.c, + * which was then #included when inlining was activated. + * This construction created issues with a few build and install systems, + * as it required xxhash.c to be stored in /include directory. + * + * xxHash implementation is now directly integrated within xxhash.h. + * As a consequence, xxhash.c is no longer needed in /include. + * + * xxhash.c is still available and is still useful. + * In a "normal" setup, when xxhash is not inlined, + * xxhash.h only exposes the prototypes and public symbols, + * while xxhash.c can be built into an object file xxhash.o + * which can then be linked into the final binary. + ************************************************************************/ + +#if ( defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API) \ + || defined(XXH_IMPLEMENTATION) ) && !defined(XXH_IMPLEM_13a8737387) +# define XXH_IMPLEM_13a8737387 + +/* ************************************* +* Tuning parameters +***************************************/ + +/*! + * @defgroup tuning Tuning parameters + * @{ + * + * Various macros to control xxHash's behavior. + */ +#ifdef XXH_DOXYGEN +/*! + * @brief Define this to disable 64-bit code. + * + * Useful if only using the @ref XXH32_family and you have a strict C90 compiler. + */ +# define XXH_NO_LONG_LONG +# undef XXH_NO_LONG_LONG /* don't actually */ +/*! + * @brief Controls how unaligned memory is accessed. + * + * By default, access to unaligned memory is controlled by `memcpy()`, which is + * safe and portable. + * + * Unfortunately, on some target/compiler combinations, the generated assembly + * is sub-optimal. + * + * The below switch allow selection of a different access method + * in the search for improved performance. + * + * @par Possible options: + * + * - `XXH_FORCE_MEMORY_ACCESS=0` (default): `memcpy` + * @par + * Use `memcpy()`. Safe and portable. Note that most modern compilers will + * eliminate the function call and treat it as an unaligned access. + * + * - `XXH_FORCE_MEMORY_ACCESS=1`: `__attribute__((aligned(1)))` + * @par + * Depends on compiler extensions and is therefore not portable. + * This method is safe _if_ your compiler supports it, + * and *generally* as fast or faster than `memcpy`. + * + * - `XXH_FORCE_MEMORY_ACCESS=2`: Direct cast + * @par + * Casts directly and dereferences. This method doesn't depend on the + * compiler, but it violates the C standard as it directly dereferences an + * unaligned pointer. It can generate buggy code on targets which do not + * support unaligned memory accesses, but in some circumstances, it's the + * only known way to get the most performance. + * + * - `XXH_FORCE_MEMORY_ACCESS=3`: Byteshift + * @par + * Also portable. This can generate the best code on old compilers which don't + * inline small `memcpy()` calls, and it might also be faster on big-endian + * systems which lack a native byteswap instruction. However, some compilers + * will emit literal byteshifts even if the target supports unaligned access. + * + * + * @warning + * Methods 1 and 2 rely on implementation-defined behavior. Use these with + * care, as what works on one compiler/platform/optimization level may cause + * another to read garbage data or even crash. + * + * See https://fastcompression.blogspot.com/2015/08/accessing-unaligned-memory.html for details. + * + * Prefer these methods in priority order (0 > 3 > 1 > 2) + */ +# define XXH_FORCE_MEMORY_ACCESS 0 + +/*! + * @def XXH_SIZE_OPT + * @brief Controls how much xxHash optimizes for size. + * + * xxHash, when compiled, tends to result in a rather large binary size. This + * is mostly due to heavy usage to forced inlining and constant folding of the + * @ref XXH3_family to increase performance. + * + * However, some developers prefer size over speed. This option can + * significantly reduce the size of the generated code. When using the `-Os` + * or `-Oz` options on GCC or Clang, this is defined to 1 by default, + * otherwise it is defined to 0. + * + * Most of these size optimizations can be controlled manually. + * + * This is a number from 0-2. + * - `XXH_SIZE_OPT` == 0: Default. xxHash makes no size optimizations. Speed + * comes first. + * - `XXH_SIZE_OPT` == 1: Default for `-Os` and `-Oz`. xxHash is more + * conservative and disables hacks that increase code size. It implies the + * options @ref XXH_NO_INLINE_HINTS == 1, @ref XXH_FORCE_ALIGN_CHECK == 0, + * and @ref XXH3_NEON_LANES == 8 if they are not already defined. + * - `XXH_SIZE_OPT` == 2: xxHash tries to make itself as small as possible. + * Performance may cry. For example, the single shot functions just use the + * streaming API. + */ +# define XXH_SIZE_OPT 0 + +/*! + * @def XXH_FORCE_ALIGN_CHECK + * @brief If defined to non-zero, adds a special path for aligned inputs (XXH32() + * and XXH64() only). + * + * This is an important performance trick for architectures without decent + * unaligned memory access performance. + * + * It checks for input alignment, and when conditions are met, uses a "fast + * path" employing direct 32-bit/64-bit reads, resulting in _dramatically + * faster_ read speed. + * + * The check costs one initial branch per hash, which is generally negligible, + * but not zero. + * + * Moreover, it's not useful to generate an additional code path if memory + * access uses the same instruction for both aligned and unaligned + * addresses (e.g. x86 and aarch64). + * + * In these cases, the alignment check can be removed by setting this macro to 0. + * Then the code will always use unaligned memory access. + * Align check is automatically disabled on x86, x64, ARM64, and some ARM chips + * which are platforms known to offer good unaligned memory accesses performance. + * + * It is also disabled by default when @ref XXH_SIZE_OPT >= 1. + * + * This option does not affect XXH3 (only XXH32 and XXH64). + */ +# define XXH_FORCE_ALIGN_CHECK 0 + +/*! + * @def XXH_NO_INLINE_HINTS + * @brief When non-zero, sets all functions to `static`. + * + * By default, xxHash tries to force the compiler to inline almost all internal + * functions. + * + * This can usually improve performance due to reduced jumping and improved + * constant folding, but significantly increases the size of the binary which + * might not be favorable. + * + * Additionally, sometimes the forced inlining can be detrimental to performance, + * depending on the architecture. + * + * XXH_NO_INLINE_HINTS marks all internal functions as static, giving the + * compiler full control on whether to inline or not. + * + * When not optimizing (-O0), using `-fno-inline` with GCC or Clang, or if + * @ref XXH_SIZE_OPT >= 1, this will automatically be defined. + */ +# define XXH_NO_INLINE_HINTS 0 + +/*! + * @def XXH3_INLINE_SECRET + * @brief Determines whether to inline the XXH3 withSecret code. + * + * When the secret size is known, the compiler can improve the performance + * of XXH3_64bits_withSecret() and XXH3_128bits_withSecret(). + * + * However, if the secret size is not known, it doesn't have any benefit. This + * happens when xxHash is compiled into a global symbol. Therefore, if + * @ref XXH_INLINE_ALL is *not* defined, this will be defined to 0. + * + * Additionally, this defaults to 0 on GCC 12+, which has an issue with function pointers + * that are *sometimes* force inline on -Og, and it is impossible to automatically + * detect this optimization level. + */ +# define XXH3_INLINE_SECRET 0 + +/*! + * @def XXH32_ENDJMP + * @brief Whether to use a jump for `XXH32_finalize`. + * + * For performance, `XXH32_finalize` uses multiple branches in the finalizer. + * This is generally preferable for performance, + * but depending on exact architecture, a jmp may be preferable. + * + * This setting is only possibly making a difference for very small inputs. + */ +# define XXH32_ENDJMP 0 + +/*! + * @internal + * @brief Redefines old internal names. + * + * For compatibility with code that uses xxHash's internals before the names + * were changed to improve namespacing. There is no other reason to use this. + */ +# define XXH_OLD_NAMES +# undef XXH_OLD_NAMES /* don't actually use, it is ugly. */ + +/*! + * @def XXH_NO_STREAM + * @brief Disables the streaming API. + * + * When xxHash is not inlined and the streaming functions are not used, disabling + * the streaming functions can improve code size significantly, especially with + * the @ref XXH3_family which tends to make constant folded copies of itself. + */ +# define XXH_NO_STREAM +# undef XXH_NO_STREAM /* don't actually */ +#endif /* XXH_DOXYGEN */ +/*! + * @} + */ + +#ifndef XXH_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ + /* prefer __packed__ structures (method 1) for GCC + * < ARMv7 with unaligned access (e.g. Raspbian armhf) still uses byte shifting, so we use memcpy + * which for some reason does unaligned loads. */ +# if defined(__GNUC__) && !(defined(__ARM_ARCH) && __ARM_ARCH < 7 && defined(__ARM_FEATURE_UNALIGNED)) +# define XXH_FORCE_MEMORY_ACCESS 1 +# endif +#endif + +#ifndef XXH_SIZE_OPT + /* default to 1 for -Os or -Oz */ +# if (defined(__GNUC__) || defined(__clang__)) && defined(__OPTIMIZE_SIZE__) +# define XXH_SIZE_OPT 1 +# else +# define XXH_SIZE_OPT 0 +# endif +#endif + +#ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */ + /* don't check on sizeopt, x86, aarch64, or arm when unaligned access is available */ +# if XXH_SIZE_OPT >= 1 || \ + defined(__i386) || defined(__x86_64__) || defined(__aarch64__) || defined(__ARM_FEATURE_UNALIGNED) \ + || defined(_M_IX86) || defined(_M_X64) || defined(_M_ARM64) || defined(_M_ARM) /* visual */ +# define XXH_FORCE_ALIGN_CHECK 0 +# else +# define XXH_FORCE_ALIGN_CHECK 1 +# endif +#endif + +#ifndef XXH_NO_INLINE_HINTS +# if XXH_SIZE_OPT >= 1 || defined(__NO_INLINE__) /* -O0, -fno-inline */ +# define XXH_NO_INLINE_HINTS 1 +# else +# define XXH_NO_INLINE_HINTS 0 +# endif +#endif + +#ifndef XXH3_INLINE_SECRET +# if (defined(__GNUC__) && !defined(__clang__) && __GNUC__ >= 12) \ + || !defined(XXH_INLINE_ALL) +# define XXH3_INLINE_SECRET 0 +# else +# define XXH3_INLINE_SECRET 1 +# endif +#endif + +#ifndef XXH32_ENDJMP +/* generally preferable for performance */ +# define XXH32_ENDJMP 0 +#endif + +/*! + * @defgroup impl Implementation + * @{ + */ + + +/* ************************************* +* Includes & Memory related functions +***************************************/ +#if defined(XXH_NO_STREAM) +/* nothing */ +#elif defined(XXH_NO_STDLIB) + +/* When requesting to disable any mention of stdlib, + * the library loses the ability to invoked malloc / free. + * In practice, it means that functions like `XXH*_createState()` + * will always fail, and return NULL. + * This flag is useful in situations where + * xxhash.h is integrated into some kernel, embedded or limited environment + * without access to dynamic allocation. + */ + +static XXH_CONSTF void* XXH_malloc(size_t s) { (void)s; return NULL; } +static void XXH_free(void* p) { (void)p; } + +#else + +/* + * Modify the local functions below should you wish to use + * different memory routines for malloc() and free() + */ +#include + +/*! + * @internal + * @brief Modify this function to use a different routine than malloc(). + */ +static XXH_MALLOCF void* XXH_malloc(size_t s) { return malloc(s); } + +/*! + * @internal + * @brief Modify this function to use a different routine than free(). + */ +static void XXH_free(void* p) { free(p); } + +#endif /* XXH_NO_STDLIB */ + +#include + +/*! + * @internal + * @brief Modify this function to use a different routine than memcpy(). + */ +static void* XXH_memcpy(void* dest, const void* src, size_t size) +{ + return memcpy(dest,src,size); +} + +#include /* ULLONG_MAX */ + + +/* ************************************* +* Compiler Specific Options +***************************************/ +#ifdef _MSC_VER /* Visual Studio warning fix */ +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +#endif + +#if XXH_NO_INLINE_HINTS /* disable inlining hints */ +# if defined(__GNUC__) || defined(__clang__) +# define XXH_FORCE_INLINE static __attribute__((unused)) +# else +# define XXH_FORCE_INLINE static +# endif +# define XXH_NO_INLINE static +/* enable inlining hints */ +#elif defined(__GNUC__) || defined(__clang__) +# define XXH_FORCE_INLINE static __inline__ __attribute__((always_inline, unused)) +# define XXH_NO_INLINE static __attribute__((noinline)) +#elif defined(_MSC_VER) /* Visual Studio */ +# define XXH_FORCE_INLINE static __forceinline +# define XXH_NO_INLINE static __declspec(noinline) +#elif defined (__cplusplus) \ + || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) /* C99 */ +# define XXH_FORCE_INLINE static inline +# define XXH_NO_INLINE static +#else +# define XXH_FORCE_INLINE static +# define XXH_NO_INLINE static +#endif + +#if XXH3_INLINE_SECRET +# define XXH3_WITH_SECRET_INLINE XXH_FORCE_INLINE +#else +# define XXH3_WITH_SECRET_INLINE XXH_NO_INLINE +#endif + + +/* ************************************* +* Debug +***************************************/ +/*! + * @ingroup tuning + * @def XXH_DEBUGLEVEL + * @brief Sets the debugging level. + * + * XXH_DEBUGLEVEL is expected to be defined externally, typically via the + * compiler's command line options. The value must be a number. + */ +#ifndef XXH_DEBUGLEVEL +# ifdef DEBUGLEVEL /* backwards compat */ +# define XXH_DEBUGLEVEL DEBUGLEVEL +# else +# define XXH_DEBUGLEVEL 0 +# endif +#endif + +#if (XXH_DEBUGLEVEL>=1) +# include /* note: can still be disabled with NDEBUG */ +# define XXH_ASSERT(c) assert(c) +#else +# if defined(__INTEL_COMPILER) +# define XXH_ASSERT(c) XXH_ASSUME((unsigned char) (c)) +# else +# define XXH_ASSERT(c) XXH_ASSUME(c) +# endif +#endif + +/* note: use after variable declarations */ +#ifndef XXH_STATIC_ASSERT +# if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) /* C11 */ +# define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { _Static_assert((c),m); } while(0) +# elif defined(__cplusplus) && (__cplusplus >= 201103L) /* C++11 */ +# define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { static_assert((c),m); } while(0) +# else +# define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { struct xxh_sa { char x[(c) ? 1 : -1]; }; } while(0) +# endif +# define XXH_STATIC_ASSERT(c) XXH_STATIC_ASSERT_WITH_MESSAGE((c),#c) +#endif + +/*! + * @internal + * @def XXH_COMPILER_GUARD(var) + * @brief Used to prevent unwanted optimizations for @p var. + * + * It uses an empty GCC inline assembly statement with a register constraint + * which forces @p var into a general purpose register (eg eax, ebx, ecx + * on x86) and marks it as modified. + * + * This is used in a few places to avoid unwanted autovectorization (e.g. + * XXH32_round()). All vectorization we want is explicit via intrinsics, + * and _usually_ isn't wanted elsewhere. + * + * We also use it to prevent unwanted constant folding for AArch64 in + * XXH3_initCustomSecret_scalar(). + */ +#if defined(__GNUC__) || defined(__clang__) +# define XXH_COMPILER_GUARD(var) __asm__("" : "+r" (var)) +#else +# define XXH_COMPILER_GUARD(var) ((void)0) +#endif + +/* Specifically for NEON vectors which use the "w" constraint, on + * Clang. */ +#if defined(__clang__) && defined(__ARM_ARCH) && !defined(__wasm__) +# define XXH_COMPILER_GUARD_CLANG_NEON(var) __asm__("" : "+w" (var)) +#else +# define XXH_COMPILER_GUARD_CLANG_NEON(var) ((void)0) +#endif + +/* ************************************* +* Basic Types +***************************************/ +#if !defined (__VMS) \ + && (defined (__cplusplus) \ + || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) +# ifdef _AIX +# include +# else +# include +# endif + typedef uint8_t xxh_u8; +#else + typedef unsigned char xxh_u8; +#endif +typedef XXH32_hash_t xxh_u32; + +#ifdef XXH_OLD_NAMES +# warning "XXH_OLD_NAMES is planned to be removed starting v0.9. If the program depends on it, consider moving away from it by employing newer type names directly" +# define BYTE xxh_u8 +# define U8 xxh_u8 +# define U32 xxh_u32 +#endif + +/* *** Memory access *** */ + +/*! + * @internal + * @fn xxh_u32 XXH_read32(const void* ptr) + * @brief Reads an unaligned 32-bit integer from @p ptr in native endianness. + * + * Affected by @ref XXH_FORCE_MEMORY_ACCESS. + * + * @param ptr The pointer to read from. + * @return The 32-bit native endian integer from the bytes at @p ptr. + */ + +/*! + * @internal + * @fn xxh_u32 XXH_readLE32(const void* ptr) + * @brief Reads an unaligned 32-bit little endian integer from @p ptr. + * + * Affected by @ref XXH_FORCE_MEMORY_ACCESS. + * + * @param ptr The pointer to read from. + * @return The 32-bit little endian integer from the bytes at @p ptr. + */ + +/*! + * @internal + * @fn xxh_u32 XXH_readBE32(const void* ptr) + * @brief Reads an unaligned 32-bit big endian integer from @p ptr. + * + * Affected by @ref XXH_FORCE_MEMORY_ACCESS. + * + * @param ptr The pointer to read from. + * @return The 32-bit big endian integer from the bytes at @p ptr. + */ + +/*! + * @internal + * @fn xxh_u32 XXH_readLE32_align(const void* ptr, XXH_alignment align) + * @brief Like @ref XXH_readLE32(), but has an option for aligned reads. + * + * Affected by @ref XXH_FORCE_MEMORY_ACCESS. + * Note that when @ref XXH_FORCE_ALIGN_CHECK == 0, the @p align parameter is + * always @ref XXH_alignment::XXH_unaligned. + * + * @param ptr The pointer to read from. + * @param align Whether @p ptr is aligned. + * @pre + * If @p align == @ref XXH_alignment::XXH_aligned, @p ptr must be 4 byte + * aligned. + * @return The 32-bit little endian integer from the bytes at @p ptr. + */ + +#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3)) +/* + * Manual byteshift. Best for old compilers which don't inline memcpy. + * We actually directly use XXH_readLE32 and XXH_readBE32. + */ +#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2)) + +/* + * Force direct memory access. Only works on CPU which support unaligned memory + * access in hardware. + */ +static xxh_u32 XXH_read32(const void* memPtr) { return *(const xxh_u32*) memPtr; } + +#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1)) + +/* + * __attribute__((aligned(1))) is supported by gcc and clang. Originally the + * documentation claimed that it only increased the alignment, but actually it + * can decrease it on gcc, clang, and icc: + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=69502, + * https://gcc.godbolt.org/z/xYez1j67Y. + */ +#ifdef XXH_OLD_NAMES +typedef union { xxh_u32 u32; } __attribute__((packed)) unalign; +#endif +static xxh_u32 XXH_read32(const void* ptr) +{ + typedef __attribute__((aligned(1))) xxh_u32 xxh_unalign32; + return *((const xxh_unalign32*)ptr); +} + +#else + +/* + * Portable and safe solution. Generally efficient. + * see: https://fastcompression.blogspot.com/2015/08/accessing-unaligned-memory.html + */ +static xxh_u32 XXH_read32(const void* memPtr) +{ + xxh_u32 val; + XXH_memcpy(&val, memPtr, sizeof(val)); + return val; +} + +#endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */ + + +/* *** Endianness *** */ + +/*! + * @ingroup tuning + * @def XXH_CPU_LITTLE_ENDIAN + * @brief Whether the target is little endian. + * + * Defined to 1 if the target is little endian, or 0 if it is big endian. + * It can be defined externally, for example on the compiler command line. + * + * If it is not defined, + * a runtime check (which is usually constant folded) is used instead. + * + * @note + * This is not necessarily defined to an integer constant. + * + * @see XXH_isLittleEndian() for the runtime check. + */ +#ifndef XXH_CPU_LITTLE_ENDIAN +/* + * Try to detect endianness automatically, to avoid the nonstandard behavior + * in `XXH_isLittleEndian()` + */ +# if defined(_WIN32) /* Windows is always little endian */ \ + || defined(__LITTLE_ENDIAN__) \ + || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) +# define XXH_CPU_LITTLE_ENDIAN 1 +# elif defined(__BIG_ENDIAN__) \ + || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) +# define XXH_CPU_LITTLE_ENDIAN 0 +# else +/*! + * @internal + * @brief Runtime check for @ref XXH_CPU_LITTLE_ENDIAN. + * + * Most compilers will constant fold this. + */ +static int XXH_isLittleEndian(void) +{ + /* + * Portable and well-defined behavior. + * Don't use static: it is detrimental to performance. + */ + const union { xxh_u32 u; xxh_u8 c[4]; } one = { 1 }; + return one.c[0]; +} +# define XXH_CPU_LITTLE_ENDIAN XXH_isLittleEndian() +# endif +#endif + + + + +/* **************************************** +* Compiler-specific Functions and Macros +******************************************/ +#define XXH_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) + +#ifdef __has_builtin +# define XXH_HAS_BUILTIN(x) __has_builtin(x) +#else +# define XXH_HAS_BUILTIN(x) 0 +#endif + + + +/* + * C23 and future versions have standard "unreachable()". + * Once it has been implemented reliably we can add it as an + * additional case: + * + * ``` + * #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= XXH_C23_VN) + * # include + * # ifdef unreachable + * # define XXH_UNREACHABLE() unreachable() + * # endif + * #endif + * ``` + * + * Note C++23 also has std::unreachable() which can be detected + * as follows: + * ``` + * #if defined(__cpp_lib_unreachable) && (__cpp_lib_unreachable >= 202202L) + * # include + * # define XXH_UNREACHABLE() std::unreachable() + * #endif + * ``` + * NB: `__cpp_lib_unreachable` is defined in the `` header. + * We don't use that as including `` in `extern "C"` blocks + * doesn't work on GCC12 + */ + +#if XXH_HAS_BUILTIN(__builtin_unreachable) +# define XXH_UNREACHABLE() __builtin_unreachable() + +#elif defined(_MSC_VER) +# define XXH_UNREACHABLE() __assume(0) + +#else +# define XXH_UNREACHABLE() +#endif + +#if XXH_HAS_BUILTIN(__builtin_assume) +# define XXH_ASSUME(c) __builtin_assume(c) +#else +# define XXH_ASSUME(c) if (!(c)) { XXH_UNREACHABLE(); } +#endif + +/*! + * @internal + * @def XXH_rotl32(x,r) + * @brief 32-bit rotate left. + * + * @param x The 32-bit integer to be rotated. + * @param r The number of bits to rotate. + * @pre + * @p r > 0 && @p r < 32 + * @note + * @p x and @p r may be evaluated multiple times. + * @return The rotated result. + */ +#if !defined(NO_CLANG_BUILTIN) && XXH_HAS_BUILTIN(__builtin_rotateleft32) \ + && XXH_HAS_BUILTIN(__builtin_rotateleft64) +# define XXH_rotl32 __builtin_rotateleft32 +# define XXH_rotl64 __builtin_rotateleft64 +/* Note: although _rotl exists for minGW (GCC under windows), performance seems poor */ +#elif defined(_MSC_VER) +# define XXH_rotl32(x,r) _rotl(x,r) +# define XXH_rotl64(x,r) _rotl64(x,r) +#else +# define XXH_rotl32(x,r) (((x) << (r)) | ((x) >> (32 - (r)))) +# define XXH_rotl64(x,r) (((x) << (r)) | ((x) >> (64 - (r)))) +#endif + +/*! + * @internal + * @fn xxh_u32 XXH_swap32(xxh_u32 x) + * @brief A 32-bit byteswap. + * + * @param x The 32-bit integer to byteswap. + * @return @p x, byteswapped. + */ +#if defined(_MSC_VER) /* Visual Studio */ +# define XXH_swap32 _byteswap_ulong +#elif XXH_GCC_VERSION >= 403 +# define XXH_swap32 __builtin_bswap32 +#else +static xxh_u32 XXH_swap32 (xxh_u32 x) +{ + return ((x << 24) & 0xff000000 ) | + ((x << 8) & 0x00ff0000 ) | + ((x >> 8) & 0x0000ff00 ) | + ((x >> 24) & 0x000000ff ); +} +#endif + + +/* *************************** +* Memory reads +*****************************/ + +/*! + * @internal + * @brief Enum to indicate whether a pointer is aligned. + */ +typedef enum { + XXH_aligned, /*!< Aligned */ + XXH_unaligned /*!< Possibly unaligned */ +} XXH_alignment; + +/* + * XXH_FORCE_MEMORY_ACCESS==3 is an endian-independent byteshift load. + * + * This is ideal for older compilers which don't inline memcpy. + */ +#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3)) + +XXH_FORCE_INLINE xxh_u32 XXH_readLE32(const void* memPtr) +{ + const xxh_u8* bytePtr = (const xxh_u8 *)memPtr; + return bytePtr[0] + | ((xxh_u32)bytePtr[1] << 8) + | ((xxh_u32)bytePtr[2] << 16) + | ((xxh_u32)bytePtr[3] << 24); +} + +XXH_FORCE_INLINE xxh_u32 XXH_readBE32(const void* memPtr) +{ + const xxh_u8* bytePtr = (const xxh_u8 *)memPtr; + return bytePtr[3] + | ((xxh_u32)bytePtr[2] << 8) + | ((xxh_u32)bytePtr[1] << 16) + | ((xxh_u32)bytePtr[0] << 24); +} + +#else +XXH_FORCE_INLINE xxh_u32 XXH_readLE32(const void* ptr) +{ + return XXH_CPU_LITTLE_ENDIAN ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr)); +} + +static xxh_u32 XXH_readBE32(const void* ptr) +{ + return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr); +} +#endif + +XXH_FORCE_INLINE xxh_u32 +XXH_readLE32_align(const void* ptr, XXH_alignment align) +{ + if (align==XXH_unaligned) { + return XXH_readLE32(ptr); + } else { + return XXH_CPU_LITTLE_ENDIAN ? *(const xxh_u32*)ptr : XXH_swap32(*(const xxh_u32*)ptr); + } +} + + +/* ************************************* +* Misc +***************************************/ +/*! @ingroup public */ +XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; } + + +/* ******************************************************************* +* 32-bit hash functions +*********************************************************************/ +/*! + * @} + * @defgroup XXH32_impl XXH32 implementation + * @ingroup impl + * + * Details on the XXH32 implementation. + * @{ + */ + /* #define instead of static const, to be used as initializers */ +#define XXH_PRIME32_1 0x9E3779B1U /*!< 0b10011110001101110111100110110001 */ +#define XXH_PRIME32_2 0x85EBCA77U /*!< 0b10000101111010111100101001110111 */ +#define XXH_PRIME32_3 0xC2B2AE3DU /*!< 0b11000010101100101010111000111101 */ +#define XXH_PRIME32_4 0x27D4EB2FU /*!< 0b00100111110101001110101100101111 */ +#define XXH_PRIME32_5 0x165667B1U /*!< 0b00010110010101100110011110110001 */ + +#ifdef XXH_OLD_NAMES +# define PRIME32_1 XXH_PRIME32_1 +# define PRIME32_2 XXH_PRIME32_2 +# define PRIME32_3 XXH_PRIME32_3 +# define PRIME32_4 XXH_PRIME32_4 +# define PRIME32_5 XXH_PRIME32_5 +#endif + +/*! + * @internal + * @brief Normal stripe processing routine. + * + * This shuffles the bits so that any bit from @p input impacts several bits in + * @p acc. + * + * @param acc The accumulator lane. + * @param input The stripe of input to mix. + * @return The mixed accumulator lane. + */ +static xxh_u32 XXH32_round(xxh_u32 acc, xxh_u32 input) +{ + acc += input * XXH_PRIME32_2; + acc = XXH_rotl32(acc, 13); + acc *= XXH_PRIME32_1; +#if (defined(__SSE4_1__) || defined(__aarch64__) || defined(__wasm_simd128__)) && !defined(XXH_ENABLE_AUTOVECTORIZE) + /* + * UGLY HACK: + * A compiler fence is the only thing that prevents GCC and Clang from + * autovectorizing the XXH32 loop (pragmas and attributes don't work for some + * reason) without globally disabling SSE4.1. + * + * The reason we want to avoid vectorization is because despite working on + * 4 integers at a time, there are multiple factors slowing XXH32 down on + * SSE4: + * - There's a ridiculous amount of lag from pmulld (10 cycles of latency on + * newer chips!) making it slightly slower to multiply four integers at + * once compared to four integers independently. Even when pmulld was + * fastest, Sandy/Ivy Bridge, it is still not worth it to go into SSE + * just to multiply unless doing a long operation. + * + * - Four instructions are required to rotate, + * movqda tmp, v // not required with VEX encoding + * pslld tmp, 13 // tmp <<= 13 + * psrld v, 19 // x >>= 19 + * por v, tmp // x |= tmp + * compared to one for scalar: + * roll v, 13 // reliably fast across the board + * shldl v, v, 13 // Sandy Bridge and later prefer this for some reason + * + * - Instruction level parallelism is actually more beneficial here because + * the SIMD actually serializes this operation: While v1 is rotating, v2 + * can load data, while v3 can multiply. SSE forces them to operate + * together. + * + * This is also enabled on AArch64, as Clang is *very aggressive* in vectorizing + * the loop. NEON is only faster on the A53, and with the newer cores, it is less + * than half the speed. + * + * Additionally, this is used on WASM SIMD128 because it JITs to the same + * SIMD instructions and has the same issue. + */ + XXH_COMPILER_GUARD(acc); +#endif + return acc; +} + +/*! + * @internal + * @brief Mixes all bits to finalize the hash. + * + * The final mix ensures that all input bits have a chance to impact any bit in + * the output digest, resulting in an unbiased distribution. + * + * @param hash The hash to avalanche. + * @return The avalanched hash. + */ +static xxh_u32 XXH32_avalanche(xxh_u32 hash) +{ + hash ^= hash >> 15; + hash *= XXH_PRIME32_2; + hash ^= hash >> 13; + hash *= XXH_PRIME32_3; + hash ^= hash >> 16; + return hash; +} + +#define XXH_get32bits(p) XXH_readLE32_align(p, align) + +/*! + * @internal + * @brief Processes the last 0-15 bytes of @p ptr. + * + * There may be up to 15 bytes remaining to consume from the input. + * This final stage will digest them to ensure that all input bytes are present + * in the final mix. + * + * @param hash The hash to finalize. + * @param ptr The pointer to the remaining input. + * @param len The remaining length, modulo 16. + * @param align Whether @p ptr is aligned. + * @return The finalized hash. + * @see XXH64_finalize(). + */ +static XXH_PUREF xxh_u32 +XXH32_finalize(xxh_u32 hash, const xxh_u8* ptr, size_t len, XXH_alignment align) +{ +#define XXH_PROCESS1 do { \ + hash += (*ptr++) * XXH_PRIME32_5; \ + hash = XXH_rotl32(hash, 11) * XXH_PRIME32_1; \ +} while (0) + +#define XXH_PROCESS4 do { \ + hash += XXH_get32bits(ptr) * XXH_PRIME32_3; \ + ptr += 4; \ + hash = XXH_rotl32(hash, 17) * XXH_PRIME32_4; \ +} while (0) + + if (ptr==NULL) XXH_ASSERT(len == 0); + + /* Compact rerolled version; generally faster */ + if (!XXH32_ENDJMP) { + len &= 15; + while (len >= 4) { + XXH_PROCESS4; + len -= 4; + } + while (len > 0) { + XXH_PROCESS1; + --len; + } + return XXH32_avalanche(hash); + } else { + switch(len&15) /* or switch(bEnd - p) */ { + case 12: XXH_PROCESS4; + XXH_FALLTHROUGH; /* fallthrough */ + case 8: XXH_PROCESS4; + XXH_FALLTHROUGH; /* fallthrough */ + case 4: XXH_PROCESS4; + return XXH32_avalanche(hash); + + case 13: XXH_PROCESS4; + XXH_FALLTHROUGH; /* fallthrough */ + case 9: XXH_PROCESS4; + XXH_FALLTHROUGH; /* fallthrough */ + case 5: XXH_PROCESS4; + XXH_PROCESS1; + return XXH32_avalanche(hash); + + case 14: XXH_PROCESS4; + XXH_FALLTHROUGH; /* fallthrough */ + case 10: XXH_PROCESS4; + XXH_FALLTHROUGH; /* fallthrough */ + case 6: XXH_PROCESS4; + XXH_PROCESS1; + XXH_PROCESS1; + return XXH32_avalanche(hash); + + case 15: XXH_PROCESS4; + XXH_FALLTHROUGH; /* fallthrough */ + case 11: XXH_PROCESS4; + XXH_FALLTHROUGH; /* fallthrough */ + case 7: XXH_PROCESS4; + XXH_FALLTHROUGH; /* fallthrough */ + case 3: XXH_PROCESS1; + XXH_FALLTHROUGH; /* fallthrough */ + case 2: XXH_PROCESS1; + XXH_FALLTHROUGH; /* fallthrough */ + case 1: XXH_PROCESS1; + XXH_FALLTHROUGH; /* fallthrough */ + case 0: return XXH32_avalanche(hash); + } + XXH_ASSERT(0); + return hash; /* reaching this point is deemed impossible */ + } +} + +#ifdef XXH_OLD_NAMES +# define PROCESS1 XXH_PROCESS1 +# define PROCESS4 XXH_PROCESS4 +#else +# undef XXH_PROCESS1 +# undef XXH_PROCESS4 +#endif + +/*! + * @internal + * @brief The implementation for @ref XXH32(). + * + * @param input , len , seed Directly passed from @ref XXH32(). + * @param align Whether @p input is aligned. + * @return The calculated hash. + */ +XXH_FORCE_INLINE XXH_PUREF xxh_u32 +XXH32_endian_align(const xxh_u8* input, size_t len, xxh_u32 seed, XXH_alignment align) +{ + xxh_u32 h32; + + if (input==NULL) XXH_ASSERT(len == 0); + + if (len>=16) { + const xxh_u8* const bEnd = input + len; + const xxh_u8* const limit = bEnd - 15; + xxh_u32 v1 = seed + XXH_PRIME32_1 + XXH_PRIME32_2; + xxh_u32 v2 = seed + XXH_PRIME32_2; + xxh_u32 v3 = seed + 0; + xxh_u32 v4 = seed - XXH_PRIME32_1; + + do { + v1 = XXH32_round(v1, XXH_get32bits(input)); input += 4; + v2 = XXH32_round(v2, XXH_get32bits(input)); input += 4; + v3 = XXH32_round(v3, XXH_get32bits(input)); input += 4; + v4 = XXH32_round(v4, XXH_get32bits(input)); input += 4; + } while (input < limit); + + h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18); + } else { + h32 = seed + XXH_PRIME32_5; + } + + h32 += (xxh_u32)len; + + return XXH32_finalize(h32, input, len&15, align); +} + +/*! @ingroup XXH32_family */ +XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t len, XXH32_hash_t seed) +{ +#if !defined(XXH_NO_STREAM) && XXH_SIZE_OPT >= 2 + /* Simple version, good for code maintenance, but unfortunately slow for small inputs */ + XXH32_state_t state; + XXH32_reset(&state, seed); + XXH32_update(&state, (const xxh_u8*)input, len); + return XXH32_digest(&state); +#else + if (XXH_FORCE_ALIGN_CHECK) { + if ((((size_t)input) & 3) == 0) { /* Input is 4-bytes aligned, leverage the speed benefit */ + return XXH32_endian_align((const xxh_u8*)input, len, seed, XXH_aligned); + } } + + return XXH32_endian_align((const xxh_u8*)input, len, seed, XXH_unaligned); +#endif +} + + + +/******* Hash streaming *******/ +#ifndef XXH_NO_STREAM +/*! @ingroup XXH32_family */ +XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void) +{ + return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t)); +} +/*! @ingroup XXH32_family */ +XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr) +{ + XXH_free(statePtr); + return XXH_OK; +} + +/*! @ingroup XXH32_family */ +XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dstState, const XXH32_state_t* srcState) +{ + XXH_memcpy(dstState, srcState, sizeof(*dstState)); +} + +/*! @ingroup XXH32_family */ +XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, XXH32_hash_t seed) +{ + XXH_ASSERT(statePtr != NULL); + memset(statePtr, 0, sizeof(*statePtr)); + statePtr->v[0] = seed + XXH_PRIME32_1 + XXH_PRIME32_2; + statePtr->v[1] = seed + XXH_PRIME32_2; + statePtr->v[2] = seed + 0; + statePtr->v[3] = seed - XXH_PRIME32_1; + return XXH_OK; +} + + +/*! @ingroup XXH32_family */ +XXH_PUBLIC_API XXH_errorcode +XXH32_update(XXH32_state_t* state, const void* input, size_t len) +{ + if (input==NULL) { + XXH_ASSERT(len == 0); + return XXH_OK; + } + + { const xxh_u8* p = (const xxh_u8*)input; + const xxh_u8* const bEnd = p + len; + + state->total_len_32 += (XXH32_hash_t)len; + state->large_len |= (XXH32_hash_t)((len>=16) | (state->total_len_32>=16)); + + if (state->memsize + len < 16) { /* fill in tmp buffer */ + XXH_memcpy((xxh_u8*)(state->mem32) + state->memsize, input, len); + state->memsize += (XXH32_hash_t)len; + return XXH_OK; + } + + if (state->memsize) { /* some data left from previous update */ + XXH_memcpy((xxh_u8*)(state->mem32) + state->memsize, input, 16-state->memsize); + { const xxh_u32* p32 = state->mem32; + state->v[0] = XXH32_round(state->v[0], XXH_readLE32(p32)); p32++; + state->v[1] = XXH32_round(state->v[1], XXH_readLE32(p32)); p32++; + state->v[2] = XXH32_round(state->v[2], XXH_readLE32(p32)); p32++; + state->v[3] = XXH32_round(state->v[3], XXH_readLE32(p32)); + } + p += 16-state->memsize; + state->memsize = 0; + } + + if (p <= bEnd-16) { + const xxh_u8* const limit = bEnd - 16; + + do { + state->v[0] = XXH32_round(state->v[0], XXH_readLE32(p)); p+=4; + state->v[1] = XXH32_round(state->v[1], XXH_readLE32(p)); p+=4; + state->v[2] = XXH32_round(state->v[2], XXH_readLE32(p)); p+=4; + state->v[3] = XXH32_round(state->v[3], XXH_readLE32(p)); p+=4; + } while (p<=limit); + + } + + if (p < bEnd) { + XXH_memcpy(state->mem32, p, (size_t)(bEnd-p)); + state->memsize = (unsigned)(bEnd-p); + } + } + + return XXH_OK; +} + + +/*! @ingroup XXH32_family */ +XXH_PUBLIC_API XXH32_hash_t XXH32_digest(const XXH32_state_t* state) +{ + xxh_u32 h32; + + if (state->large_len) { + h32 = XXH_rotl32(state->v[0], 1) + + XXH_rotl32(state->v[1], 7) + + XXH_rotl32(state->v[2], 12) + + XXH_rotl32(state->v[3], 18); + } else { + h32 = state->v[2] /* == seed */ + XXH_PRIME32_5; + } + + h32 += state->total_len_32; + + return XXH32_finalize(h32, (const xxh_u8*)state->mem32, state->memsize, XXH_aligned); +} +#endif /* !XXH_NO_STREAM */ + +/******* Canonical representation *******/ + +/*! @ingroup XXH32_family */ +XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash) +{ + XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t)); + if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash); + XXH_memcpy(dst, &hash, sizeof(*dst)); +} +/*! @ingroup XXH32_family */ +XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src) +{ + return XXH_readBE32(src); +} + + +#ifndef XXH_NO_LONG_LONG + +/* ******************************************************************* +* 64-bit hash functions +*********************************************************************/ +/*! + * @} + * @ingroup impl + * @{ + */ +/******* Memory access *******/ + +typedef XXH64_hash_t xxh_u64; + +#ifdef XXH_OLD_NAMES +# define U64 xxh_u64 +#endif + +#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3)) +/* + * Manual byteshift. Best for old compilers which don't inline memcpy. + * We actually directly use XXH_readLE64 and XXH_readBE64. + */ +#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2)) + +/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */ +static xxh_u64 XXH_read64(const void* memPtr) +{ + return *(const xxh_u64*) memPtr; +} + +#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1)) + +/* + * __attribute__((aligned(1))) is supported by gcc and clang. Originally the + * documentation claimed that it only increased the alignment, but actually it + * can decrease it on gcc, clang, and icc: + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=69502, + * https://gcc.godbolt.org/z/xYez1j67Y. + */ +#ifdef XXH_OLD_NAMES +typedef union { xxh_u32 u32; xxh_u64 u64; } __attribute__((packed)) unalign64; +#endif +static xxh_u64 XXH_read64(const void* ptr) +{ + typedef __attribute__((aligned(1))) xxh_u64 xxh_unalign64; + return *((const xxh_unalign64*)ptr); +} + +#else + +/* + * Portable and safe solution. Generally efficient. + * see: https://fastcompression.blogspot.com/2015/08/accessing-unaligned-memory.html + */ +static xxh_u64 XXH_read64(const void* memPtr) +{ + xxh_u64 val; + XXH_memcpy(&val, memPtr, sizeof(val)); + return val; +} + +#endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */ + +#if defined(_MSC_VER) /* Visual Studio */ +# define XXH_swap64 _byteswap_uint64 +#elif XXH_GCC_VERSION >= 403 +# define XXH_swap64 __builtin_bswap64 +#else +static xxh_u64 XXH_swap64(xxh_u64 x) +{ + return ((x << 56) & 0xff00000000000000ULL) | + ((x << 40) & 0x00ff000000000000ULL) | + ((x << 24) & 0x0000ff0000000000ULL) | + ((x << 8) & 0x000000ff00000000ULL) | + ((x >> 8) & 0x00000000ff000000ULL) | + ((x >> 24) & 0x0000000000ff0000ULL) | + ((x >> 40) & 0x000000000000ff00ULL) | + ((x >> 56) & 0x00000000000000ffULL); +} +#endif + + +/* XXH_FORCE_MEMORY_ACCESS==3 is an endian-independent byteshift load. */ +#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3)) + +XXH_FORCE_INLINE xxh_u64 XXH_readLE64(const void* memPtr) +{ + const xxh_u8* bytePtr = (const xxh_u8 *)memPtr; + return bytePtr[0] + | ((xxh_u64)bytePtr[1] << 8) + | ((xxh_u64)bytePtr[2] << 16) + | ((xxh_u64)bytePtr[3] << 24) + | ((xxh_u64)bytePtr[4] << 32) + | ((xxh_u64)bytePtr[5] << 40) + | ((xxh_u64)bytePtr[6] << 48) + | ((xxh_u64)bytePtr[7] << 56); +} + +XXH_FORCE_INLINE xxh_u64 XXH_readBE64(const void* memPtr) +{ + const xxh_u8* bytePtr = (const xxh_u8 *)memPtr; + return bytePtr[7] + | ((xxh_u64)bytePtr[6] << 8) + | ((xxh_u64)bytePtr[5] << 16) + | ((xxh_u64)bytePtr[4] << 24) + | ((xxh_u64)bytePtr[3] << 32) + | ((xxh_u64)bytePtr[2] << 40) + | ((xxh_u64)bytePtr[1] << 48) + | ((xxh_u64)bytePtr[0] << 56); +} + +#else +XXH_FORCE_INLINE xxh_u64 XXH_readLE64(const void* ptr) +{ + return XXH_CPU_LITTLE_ENDIAN ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr)); +} + +static xxh_u64 XXH_readBE64(const void* ptr) +{ + return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr); +} +#endif + +XXH_FORCE_INLINE xxh_u64 +XXH_readLE64_align(const void* ptr, XXH_alignment align) +{ + if (align==XXH_unaligned) + return XXH_readLE64(ptr); + else + return XXH_CPU_LITTLE_ENDIAN ? *(const xxh_u64*)ptr : XXH_swap64(*(const xxh_u64*)ptr); +} + + +/******* xxh64 *******/ +/*! + * @} + * @defgroup XXH64_impl XXH64 implementation + * @ingroup impl + * + * Details on the XXH64 implementation. + * @{ + */ +/* #define rather that static const, to be used as initializers */ +#define XXH_PRIME64_1 0x9E3779B185EBCA87ULL /*!< 0b1001111000110111011110011011000110000101111010111100101010000111 */ +#define XXH_PRIME64_2 0xC2B2AE3D27D4EB4FULL /*!< 0b1100001010110010101011100011110100100111110101001110101101001111 */ +#define XXH_PRIME64_3 0x165667B19E3779F9ULL /*!< 0b0001011001010110011001111011000110011110001101110111100111111001 */ +#define XXH_PRIME64_4 0x85EBCA77C2B2AE63ULL /*!< 0b1000010111101011110010100111011111000010101100101010111001100011 */ +#define XXH_PRIME64_5 0x27D4EB2F165667C5ULL /*!< 0b0010011111010100111010110010111100010110010101100110011111000101 */ + +#ifdef XXH_OLD_NAMES +# define PRIME64_1 XXH_PRIME64_1 +# define PRIME64_2 XXH_PRIME64_2 +# define PRIME64_3 XXH_PRIME64_3 +# define PRIME64_4 XXH_PRIME64_4 +# define PRIME64_5 XXH_PRIME64_5 +#endif + +/*! @copydoc XXH32_round */ +static xxh_u64 XXH64_round(xxh_u64 acc, xxh_u64 input) +{ + acc += input * XXH_PRIME64_2; + acc = XXH_rotl64(acc, 31); + acc *= XXH_PRIME64_1; +#if (defined(__AVX512F__)) && !defined(XXH_ENABLE_AUTOVECTORIZE) + /* + * DISABLE AUTOVECTORIZATION: + * A compiler fence is used to prevent GCC and Clang from + * autovectorizing the XXH64 loop (pragmas and attributes don't work for some + * reason) without globally disabling AVX512. + * + * Autovectorization of XXH64 tends to be detrimental, + * though the exact outcome may change depending on exact cpu and compiler version. + * For information, it has been reported as detrimental for Skylake-X, + * but possibly beneficial for Zen4. + * + * The default is to disable auto-vectorization, + * but you can select to enable it instead using `XXH_ENABLE_AUTOVECTORIZE` build variable. + */ + XXH_COMPILER_GUARD(acc); +#endif + return acc; +} + +static xxh_u64 XXH64_mergeRound(xxh_u64 acc, xxh_u64 val) +{ + val = XXH64_round(0, val); + acc ^= val; + acc = acc * XXH_PRIME64_1 + XXH_PRIME64_4; + return acc; +} + +/*! @copydoc XXH32_avalanche */ +static xxh_u64 XXH64_avalanche(xxh_u64 hash) +{ + hash ^= hash >> 33; + hash *= XXH_PRIME64_2; + hash ^= hash >> 29; + hash *= XXH_PRIME64_3; + hash ^= hash >> 32; + return hash; +} + + +#define XXH_get64bits(p) XXH_readLE64_align(p, align) + +/*! + * @internal + * @brief Processes the last 0-31 bytes of @p ptr. + * + * There may be up to 31 bytes remaining to consume from the input. + * This final stage will digest them to ensure that all input bytes are present + * in the final mix. + * + * @param hash The hash to finalize. + * @param ptr The pointer to the remaining input. + * @param len The remaining length, modulo 32. + * @param align Whether @p ptr is aligned. + * @return The finalized hash + * @see XXH32_finalize(). + */ +static XXH_PUREF xxh_u64 +XXH64_finalize(xxh_u64 hash, const xxh_u8* ptr, size_t len, XXH_alignment align) +{ + if (ptr==NULL) XXH_ASSERT(len == 0); + len &= 31; + while (len >= 8) { + xxh_u64 const k1 = XXH64_round(0, XXH_get64bits(ptr)); + ptr += 8; + hash ^= k1; + hash = XXH_rotl64(hash,27) * XXH_PRIME64_1 + XXH_PRIME64_4; + len -= 8; + } + if (len >= 4) { + hash ^= (xxh_u64)(XXH_get32bits(ptr)) * XXH_PRIME64_1; + ptr += 4; + hash = XXH_rotl64(hash, 23) * XXH_PRIME64_2 + XXH_PRIME64_3; + len -= 4; + } + while (len > 0) { + hash ^= (*ptr++) * XXH_PRIME64_5; + hash = XXH_rotl64(hash, 11) * XXH_PRIME64_1; + --len; + } + return XXH64_avalanche(hash); +} + +#ifdef XXH_OLD_NAMES +# define PROCESS1_64 XXH_PROCESS1_64 +# define PROCESS4_64 XXH_PROCESS4_64 +# define PROCESS8_64 XXH_PROCESS8_64 +#else +# undef XXH_PROCESS1_64 +# undef XXH_PROCESS4_64 +# undef XXH_PROCESS8_64 +#endif + +/*! + * @internal + * @brief The implementation for @ref XXH64(). + * + * @param input , len , seed Directly passed from @ref XXH64(). + * @param align Whether @p input is aligned. + * @return The calculated hash. + */ +XXH_FORCE_INLINE XXH_PUREF xxh_u64 +XXH64_endian_align(const xxh_u8* input, size_t len, xxh_u64 seed, XXH_alignment align) +{ + xxh_u64 h64; + if (input==NULL) XXH_ASSERT(len == 0); + + if (len>=32) { + const xxh_u8* const bEnd = input + len; + const xxh_u8* const limit = bEnd - 31; + xxh_u64 v1 = seed + XXH_PRIME64_1 + XXH_PRIME64_2; + xxh_u64 v2 = seed + XXH_PRIME64_2; + xxh_u64 v3 = seed + 0; + xxh_u64 v4 = seed - XXH_PRIME64_1; + + do { + v1 = XXH64_round(v1, XXH_get64bits(input)); input+=8; + v2 = XXH64_round(v2, XXH_get64bits(input)); input+=8; + v3 = XXH64_round(v3, XXH_get64bits(input)); input+=8; + v4 = XXH64_round(v4, XXH_get64bits(input)); input+=8; + } while (input= 2 + /* Simple version, good for code maintenance, but unfortunately slow for small inputs */ + XXH64_state_t state; + XXH64_reset(&state, seed); + XXH64_update(&state, (const xxh_u8*)input, len); + return XXH64_digest(&state); +#else + if (XXH_FORCE_ALIGN_CHECK) { + if ((((size_t)input) & 7)==0) { /* Input is aligned, let's leverage the speed advantage */ + return XXH64_endian_align((const xxh_u8*)input, len, seed, XXH_aligned); + } } + + return XXH64_endian_align((const xxh_u8*)input, len, seed, XXH_unaligned); + +#endif +} + +/******* Hash Streaming *******/ +#ifndef XXH_NO_STREAM +/*! @ingroup XXH64_family*/ +XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void) +{ + return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t)); +} +/*! @ingroup XXH64_family */ +XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr) +{ + XXH_free(statePtr); + return XXH_OK; +} + +/*! @ingroup XXH64_family */ +XXH_PUBLIC_API void XXH64_copyState(XXH_NOESCAPE XXH64_state_t* dstState, const XXH64_state_t* srcState) +{ + XXH_memcpy(dstState, srcState, sizeof(*dstState)); +} + +/*! @ingroup XXH64_family */ +XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH_NOESCAPE XXH64_state_t* statePtr, XXH64_hash_t seed) +{ + XXH_ASSERT(statePtr != NULL); + memset(statePtr, 0, sizeof(*statePtr)); + statePtr->v[0] = seed + XXH_PRIME64_1 + XXH_PRIME64_2; + statePtr->v[1] = seed + XXH_PRIME64_2; + statePtr->v[2] = seed + 0; + statePtr->v[3] = seed - XXH_PRIME64_1; + return XXH_OK; +} + +/*! @ingroup XXH64_family */ +XXH_PUBLIC_API XXH_errorcode +XXH64_update (XXH_NOESCAPE XXH64_state_t* state, XXH_NOESCAPE const void* input, size_t len) +{ + if (input==NULL) { + XXH_ASSERT(len == 0); + return XXH_OK; + } + + { const xxh_u8* p = (const xxh_u8*)input; + const xxh_u8* const bEnd = p + len; + + state->total_len += len; + + if (state->memsize + len < 32) { /* fill in tmp buffer */ + XXH_memcpy(((xxh_u8*)state->mem64) + state->memsize, input, len); + state->memsize += (xxh_u32)len; + return XXH_OK; + } + + if (state->memsize) { /* tmp buffer is full */ + XXH_memcpy(((xxh_u8*)state->mem64) + state->memsize, input, 32-state->memsize); + state->v[0] = XXH64_round(state->v[0], XXH_readLE64(state->mem64+0)); + state->v[1] = XXH64_round(state->v[1], XXH_readLE64(state->mem64+1)); + state->v[2] = XXH64_round(state->v[2], XXH_readLE64(state->mem64+2)); + state->v[3] = XXH64_round(state->v[3], XXH_readLE64(state->mem64+3)); + p += 32 - state->memsize; + state->memsize = 0; + } + + if (p+32 <= bEnd) { + const xxh_u8* const limit = bEnd - 32; + + do { + state->v[0] = XXH64_round(state->v[0], XXH_readLE64(p)); p+=8; + state->v[1] = XXH64_round(state->v[1], XXH_readLE64(p)); p+=8; + state->v[2] = XXH64_round(state->v[2], XXH_readLE64(p)); p+=8; + state->v[3] = XXH64_round(state->v[3], XXH_readLE64(p)); p+=8; + } while (p<=limit); + + } + + if (p < bEnd) { + XXH_memcpy(state->mem64, p, (size_t)(bEnd-p)); + state->memsize = (unsigned)(bEnd-p); + } + } + + return XXH_OK; +} + + +/*! @ingroup XXH64_family */ +XXH_PUBLIC_API XXH64_hash_t XXH64_digest(XXH_NOESCAPE const XXH64_state_t* state) +{ + xxh_u64 h64; + + if (state->total_len >= 32) { + h64 = XXH_rotl64(state->v[0], 1) + XXH_rotl64(state->v[1], 7) + XXH_rotl64(state->v[2], 12) + XXH_rotl64(state->v[3], 18); + h64 = XXH64_mergeRound(h64, state->v[0]); + h64 = XXH64_mergeRound(h64, state->v[1]); + h64 = XXH64_mergeRound(h64, state->v[2]); + h64 = XXH64_mergeRound(h64, state->v[3]); + } else { + h64 = state->v[2] /*seed*/ + XXH_PRIME64_5; + } + + h64 += (xxh_u64) state->total_len; + + return XXH64_finalize(h64, (const xxh_u8*)state->mem64, (size_t)state->total_len, XXH_aligned); +} +#endif /* !XXH_NO_STREAM */ + +/******* Canonical representation *******/ + +/*! @ingroup XXH64_family */ +XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH_NOESCAPE XXH64_canonical_t* dst, XXH64_hash_t hash) +{ + XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t)); + if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash); + XXH_memcpy(dst, &hash, sizeof(*dst)); +} + +/*! @ingroup XXH64_family */ +XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(XXH_NOESCAPE const XXH64_canonical_t* src) +{ + return XXH_readBE64(src); +} + +#ifndef XXH_NO_XXH3 + +/* ********************************************************************* +* XXH3 +* New generation hash designed for speed on small keys and vectorization +************************************************************************ */ +/*! + * @} + * @defgroup XXH3_impl XXH3 implementation + * @ingroup impl + * @{ + */ + +/* === Compiler specifics === */ + +#if ((defined(sun) || defined(__sun)) && __cplusplus) /* Solaris includes __STDC_VERSION__ with C++. Tested with GCC 5.5 */ +# define XXH_RESTRICT /* disable */ +#elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* >= C99 */ +# define XXH_RESTRICT restrict +#elif (defined (__GNUC__) && ((__GNUC__ > 3) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))) \ + || (defined (__clang__)) \ + || (defined (_MSC_VER) && (_MSC_VER >= 1400)) \ + || (defined (__INTEL_COMPILER) && (__INTEL_COMPILER >= 1300)) +/* + * There are a LOT more compilers that recognize __restrict but this + * covers the major ones. + */ +# define XXH_RESTRICT __restrict +#else +# define XXH_RESTRICT /* disable */ +#endif + +#if (defined(__GNUC__) && (__GNUC__ >= 3)) \ + || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) \ + || defined(__clang__) +# define XXH_likely(x) __builtin_expect(x, 1) +# define XXH_unlikely(x) __builtin_expect(x, 0) +#else +# define XXH_likely(x) (x) +# define XXH_unlikely(x) (x) +#endif + +#ifndef XXH_HAS_INCLUDE +# ifdef __has_include +/* + * Not defined as XXH_HAS_INCLUDE(x) (function-like) because + * this causes segfaults in Apple Clang 4.2 (on Mac OS X 10.7 Lion) + */ +# define XXH_HAS_INCLUDE __has_include +# else +# define XXH_HAS_INCLUDE(x) 0 +# endif +#endif + +#if defined(__GNUC__) || defined(__clang__) +# if defined(__ARM_FEATURE_SVE) +# include +# endif +# if defined(__ARM_NEON__) || defined(__ARM_NEON) \ + || (defined(_M_ARM) && _M_ARM >= 7) \ + || defined(_M_ARM64) || defined(_M_ARM64EC) \ + || (defined(__wasm_simd128__) && XXH_HAS_INCLUDE()) /* WASM SIMD128 via SIMDe */ +# define inline __inline__ /* circumvent a clang bug */ +# include +# undef inline +# elif defined(__AVX2__) +# include +# elif defined(__SSE2__) +# include +# endif +#endif + +#if defined(_MSC_VER) +# include +#endif + +/* + * One goal of XXH3 is to make it fast on both 32-bit and 64-bit, while + * remaining a true 64-bit/128-bit hash function. + * + * This is done by prioritizing a subset of 64-bit operations that can be + * emulated without too many steps on the average 32-bit machine. + * + * For example, these two lines seem similar, and run equally fast on 64-bit: + * + * xxh_u64 x; + * x ^= (x >> 47); // good + * x ^= (x >> 13); // bad + * + * However, to a 32-bit machine, there is a major difference. + * + * x ^= (x >> 47) looks like this: + * + * x.lo ^= (x.hi >> (47 - 32)); + * + * while x ^= (x >> 13) looks like this: + * + * // note: funnel shifts are not usually cheap. + * x.lo ^= (x.lo >> 13) | (x.hi << (32 - 13)); + * x.hi ^= (x.hi >> 13); + * + * The first one is significantly faster than the second, simply because the + * shift is larger than 32. This means: + * - All the bits we need are in the upper 32 bits, so we can ignore the lower + * 32 bits in the shift. + * - The shift result will always fit in the lower 32 bits, and therefore, + * we can ignore the upper 32 bits in the xor. + * + * Thanks to this optimization, XXH3 only requires these features to be efficient: + * + * - Usable unaligned access + * - A 32-bit or 64-bit ALU + * - If 32-bit, a decent ADC instruction + * - A 32 or 64-bit multiply with a 64-bit result + * - For the 128-bit variant, a decent byteswap helps short inputs. + * + * The first two are already required by XXH32, and almost all 32-bit and 64-bit + * platforms which can run XXH32 can run XXH3 efficiently. + * + * Thumb-1, the classic 16-bit only subset of ARM's instruction set, is one + * notable exception. + * + * First of all, Thumb-1 lacks support for the UMULL instruction which + * performs the important long multiply. This means numerous __aeabi_lmul + * calls. + * + * Second of all, the 8 functional registers are just not enough. + * Setup for __aeabi_lmul, byteshift loads, pointers, and all arithmetic need + * Lo registers, and this shuffling results in thousands more MOVs than A32. + * + * A32 and T32 don't have this limitation. They can access all 14 registers, + * do a 32->64 multiply with UMULL, and the flexible operand allowing free + * shifts is helpful, too. + * + * Therefore, we do a quick sanity check. + * + * If compiling Thumb-1 for a target which supports ARM instructions, we will + * emit a warning, as it is not a "sane" platform to compile for. + * + * Usually, if this happens, it is because of an accident and you probably need + * to specify -march, as you likely meant to compile for a newer architecture. + * + * Credit: large sections of the vectorial and asm source code paths + * have been contributed by @easyaspi314 + */ +#if defined(__thumb__) && !defined(__thumb2__) && defined(__ARM_ARCH_ISA_ARM) +# warning "XXH3 is highly inefficient without ARM or Thumb-2." +#endif + +/* ========================================== + * Vectorization detection + * ========================================== */ + +#ifdef XXH_DOXYGEN +/*! + * @ingroup tuning + * @brief Overrides the vectorization implementation chosen for XXH3. + * + * Can be defined to 0 to disable SIMD or any of the values mentioned in + * @ref XXH_VECTOR_TYPE. + * + * If this is not defined, it uses predefined macros to determine the best + * implementation. + */ +# define XXH_VECTOR XXH_SCALAR +/*! + * @ingroup tuning + * @brief Possible values for @ref XXH_VECTOR. + * + * Note that these are actually implemented as macros. + * + * If this is not defined, it is detected automatically. + * internal macro XXH_X86DISPATCH overrides this. + */ +enum XXH_VECTOR_TYPE /* fake enum */ { + XXH_SCALAR = 0, /*!< Portable scalar version */ + XXH_SSE2 = 1, /*!< + * SSE2 for Pentium 4, Opteron, all x86_64. + * + * @note SSE2 is also guaranteed on Windows 10, macOS, and + * Android x86. + */ + XXH_AVX2 = 2, /*!< AVX2 for Haswell and Bulldozer */ + XXH_AVX512 = 3, /*!< AVX512 for Skylake and Icelake */ + XXH_NEON = 4, /*!< + * NEON for most ARMv7-A, all AArch64, and WASM SIMD128 + * via the SIMDeverywhere polyfill provided with the + * Emscripten SDK. + */ + XXH_VSX = 5, /*!< VSX and ZVector for POWER8/z13 (64-bit) */ + XXH_SVE = 6, /*!< SVE for some ARMv8-A and ARMv9-A */ +}; +/*! + * @ingroup tuning + * @brief Selects the minimum alignment for XXH3's accumulators. + * + * When using SIMD, this should match the alignment required for said vector + * type, so, for example, 32 for AVX2. + * + * Default: Auto detected. + */ +# define XXH_ACC_ALIGN 8 +#endif + +/* Actual definition */ +#ifndef XXH_DOXYGEN +# define XXH_SCALAR 0 +# define XXH_SSE2 1 +# define XXH_AVX2 2 +# define XXH_AVX512 3 +# define XXH_NEON 4 +# define XXH_VSX 5 +# define XXH_SVE 6 +#endif + +#ifndef XXH_VECTOR /* can be defined on command line */ +# if defined(__ARM_FEATURE_SVE) +# define XXH_VECTOR XXH_SVE +# elif ( \ + defined(__ARM_NEON__) || defined(__ARM_NEON) /* gcc */ \ + || defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC) /* msvc */ \ + || (defined(__wasm_simd128__) && XXH_HAS_INCLUDE()) /* wasm simd128 via SIMDe */ \ + ) && ( \ + defined(_WIN32) || defined(__LITTLE_ENDIAN__) /* little endian only */ \ + || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) \ + ) +# define XXH_VECTOR XXH_NEON +# elif defined(__AVX512F__) +# define XXH_VECTOR XXH_AVX512 +# elif defined(__AVX2__) +# define XXH_VECTOR XXH_AVX2 +# elif defined(__SSE2__) || defined(_M_AMD64) || defined(_M_X64) || (defined(_M_IX86_FP) && (_M_IX86_FP == 2)) +# define XXH_VECTOR XXH_SSE2 +# elif (defined(__PPC64__) && defined(__POWER8_VECTOR__)) \ + || (defined(__s390x__) && defined(__VEC__)) \ + && defined(__GNUC__) /* TODO: IBM XL */ +# define XXH_VECTOR XXH_VSX +# else +# define XXH_VECTOR XXH_SCALAR +# endif +#endif + +/* __ARM_FEATURE_SVE is only supported by GCC & Clang. */ +#if (XXH_VECTOR == XXH_SVE) && !defined(__ARM_FEATURE_SVE) +# ifdef _MSC_VER +# pragma warning(once : 4606) +# else +# warning "__ARM_FEATURE_SVE isn't supported. Use SCALAR instead." +# endif +# undef XXH_VECTOR +# define XXH_VECTOR XXH_SCALAR +#endif + +/* + * Controls the alignment of the accumulator, + * for compatibility with aligned vector loads, which are usually faster. + */ +#ifndef XXH_ACC_ALIGN +# if defined(XXH_X86DISPATCH) +# define XXH_ACC_ALIGN 64 /* for compatibility with avx512 */ +# elif XXH_VECTOR == XXH_SCALAR /* scalar */ +# define XXH_ACC_ALIGN 8 +# elif XXH_VECTOR == XXH_SSE2 /* sse2 */ +# define XXH_ACC_ALIGN 16 +# elif XXH_VECTOR == XXH_AVX2 /* avx2 */ +# define XXH_ACC_ALIGN 32 +# elif XXH_VECTOR == XXH_NEON /* neon */ +# define XXH_ACC_ALIGN 16 +# elif XXH_VECTOR == XXH_VSX /* vsx */ +# define XXH_ACC_ALIGN 16 +# elif XXH_VECTOR == XXH_AVX512 /* avx512 */ +# define XXH_ACC_ALIGN 64 +# elif XXH_VECTOR == XXH_SVE /* sve */ +# define XXH_ACC_ALIGN 64 +# endif +#endif + +#if defined(XXH_X86DISPATCH) || XXH_VECTOR == XXH_SSE2 \ + || XXH_VECTOR == XXH_AVX2 || XXH_VECTOR == XXH_AVX512 +# define XXH_SEC_ALIGN XXH_ACC_ALIGN +#elif XXH_VECTOR == XXH_SVE +# define XXH_SEC_ALIGN XXH_ACC_ALIGN +#else +# define XXH_SEC_ALIGN 8 +#endif + +#if defined(__GNUC__) || defined(__clang__) +# define XXH_ALIASING __attribute__((may_alias)) +#else +# define XXH_ALIASING /* nothing */ +#endif + +/* + * UGLY HACK: + * GCC usually generates the best code with -O3 for xxHash. + * + * However, when targeting AVX2, it is overzealous in its unrolling resulting + * in code roughly 3/4 the speed of Clang. + * + * There are other issues, such as GCC splitting _mm256_loadu_si256 into + * _mm_loadu_si128 + _mm256_inserti128_si256. This is an optimization which + * only applies to Sandy and Ivy Bridge... which don't even support AVX2. + * + * That is why when compiling the AVX2 version, it is recommended to use either + * -O2 -mavx2 -march=haswell + * or + * -O2 -mavx2 -mno-avx256-split-unaligned-load + * for decent performance, or to use Clang instead. + * + * Fortunately, we can control the first one with a pragma that forces GCC into + * -O2, but the other one we can't control without "failed to inline always + * inline function due to target mismatch" warnings. + */ +#if XXH_VECTOR == XXH_AVX2 /* AVX2 */ \ + && defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \ + && defined(__OPTIMIZE__) && XXH_SIZE_OPT <= 0 /* respect -O0 and -Os */ +# pragma GCC push_options +# pragma GCC optimize("-O2") +#endif + +#if XXH_VECTOR == XXH_NEON + +/* + * UGLY HACK: While AArch64 GCC on Linux does not seem to care, on macOS, GCC -O3 + * optimizes out the entire hashLong loop because of the aliasing violation. + * + * However, GCC is also inefficient at load-store optimization with vld1q/vst1q, + * so the only option is to mark it as aliasing. + */ +typedef uint64x2_t xxh_aliasing_uint64x2_t XXH_ALIASING; + +/*! + * @internal + * @brief `vld1q_u64` but faster and alignment-safe. + * + * On AArch64, unaligned access is always safe, but on ARMv7-a, it is only + * *conditionally* safe (`vld1` has an alignment bit like `movdq[ua]` in x86). + * + * GCC for AArch64 sees `vld1q_u8` as an intrinsic instead of a load, so it + * prohibits load-store optimizations. Therefore, a direct dereference is used. + * + * Otherwise, `vld1q_u8` is used with `vreinterpretq_u8_u64` to do a safe + * unaligned load. + */ +#if defined(__aarch64__) && defined(__GNUC__) && !defined(__clang__) +XXH_FORCE_INLINE uint64x2_t XXH_vld1q_u64(void const* ptr) /* silence -Wcast-align */ +{ + return *(xxh_aliasing_uint64x2_t const *)ptr; +} +#else +XXH_FORCE_INLINE uint64x2_t XXH_vld1q_u64(void const* ptr) +{ + return vreinterpretq_u64_u8(vld1q_u8((uint8_t const*)ptr)); +} +#endif + +/*! + * @internal + * @brief `vmlal_u32` on low and high halves of a vector. + * + * This is a workaround for AArch64 GCC < 11 which implemented arm_neon.h with + * inline assembly and were therefore incapable of merging the `vget_{low, high}_u32` + * with `vmlal_u32`. + */ +#if defined(__aarch64__) && defined(__GNUC__) && !defined(__clang__) && __GNUC__ < 11 +XXH_FORCE_INLINE uint64x2_t +XXH_vmlal_low_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs) +{ + /* Inline assembly is the only way */ + __asm__("umlal %0.2d, %1.2s, %2.2s" : "+w" (acc) : "w" (lhs), "w" (rhs)); + return acc; +} +XXH_FORCE_INLINE uint64x2_t +XXH_vmlal_high_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs) +{ + /* This intrinsic works as expected */ + return vmlal_high_u32(acc, lhs, rhs); +} +#else +/* Portable intrinsic versions */ +XXH_FORCE_INLINE uint64x2_t +XXH_vmlal_low_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs) +{ + return vmlal_u32(acc, vget_low_u32(lhs), vget_low_u32(rhs)); +} +/*! @copydoc XXH_vmlal_low_u32 + * Assume the compiler converts this to vmlal_high_u32 on aarch64 */ +XXH_FORCE_INLINE uint64x2_t +XXH_vmlal_high_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs) +{ + return vmlal_u32(acc, vget_high_u32(lhs), vget_high_u32(rhs)); +} +#endif + +/*! + * @ingroup tuning + * @brief Controls the NEON to scalar ratio for XXH3 + * + * This can be set to 2, 4, 6, or 8. + * + * ARM Cortex CPUs are _very_ sensitive to how their pipelines are used. + * + * For example, the Cortex-A73 can dispatch 3 micro-ops per cycle, but only 2 of those + * can be NEON. If you are only using NEON instructions, you are only using 2/3 of the CPU + * bandwidth. + * + * This is even more noticeable on the more advanced cores like the Cortex-A76 which + * can dispatch 8 micro-ops per cycle, but still only 2 NEON micro-ops at once. + * + * Therefore, to make the most out of the pipeline, it is beneficial to run 6 NEON lanes + * and 2 scalar lanes, which is chosen by default. + * + * This does not apply to Apple processors or 32-bit processors, which run better with + * full NEON. These will default to 8. Additionally, size-optimized builds run 8 lanes. + * + * This change benefits CPUs with large micro-op buffers without negatively affecting + * most other CPUs: + * + * | Chipset | Dispatch type | NEON only | 6:2 hybrid | Diff. | + * |:----------------------|:--------------------|----------:|-----------:|------:| + * | Snapdragon 730 (A76) | 2 NEON/8 micro-ops | 8.8 GB/s | 10.1 GB/s | ~16% | + * | Snapdragon 835 (A73) | 2 NEON/3 micro-ops | 5.1 GB/s | 5.3 GB/s | ~5% | + * | Marvell PXA1928 (A53) | In-order dual-issue | 1.9 GB/s | 1.9 GB/s | 0% | + * | Apple M1 | 4 NEON/8 micro-ops | 37.3 GB/s | 36.1 GB/s | ~-3% | + * + * It also seems to fix some bad codegen on GCC, making it almost as fast as clang. + * + * When using WASM SIMD128, if this is 2 or 6, SIMDe will scalarize 2 of the lanes meaning + * it effectively becomes worse 4. + * + * @see XXH3_accumulate_512_neon() + */ +# ifndef XXH3_NEON_LANES +# if (defined(__aarch64__) || defined(__arm64__) || defined(_M_ARM64) || defined(_M_ARM64EC)) \ + && !defined(__APPLE__) && XXH_SIZE_OPT <= 0 +# define XXH3_NEON_LANES 6 +# else +# define XXH3_NEON_LANES XXH_ACC_NB +# endif +# endif +#endif /* XXH_VECTOR == XXH_NEON */ + +/* + * VSX and Z Vector helpers. + * + * This is very messy, and any pull requests to clean this up are welcome. + * + * There are a lot of problems with supporting VSX and s390x, due to + * inconsistent intrinsics, spotty coverage, and multiple endiannesses. + */ +#if XXH_VECTOR == XXH_VSX +/* Annoyingly, these headers _may_ define three macros: `bool`, `vector`, + * and `pixel`. This is a problem for obvious reasons. + * + * These keywords are unnecessary; the spec literally says they are + * equivalent to `__bool`, `__vector`, and `__pixel` and may be undef'd + * after including the header. + * + * We use pragma push_macro/pop_macro to keep the namespace clean. */ +# pragma push_macro("bool") +# pragma push_macro("vector") +# pragma push_macro("pixel") +/* silence potential macro redefined warnings */ +# undef bool +# undef vector +# undef pixel + +# if defined(__s390x__) +# include +# else +# include +# endif + +/* Restore the original macro values, if applicable. */ +# pragma pop_macro("pixel") +# pragma pop_macro("vector") +# pragma pop_macro("bool") + +typedef __vector unsigned long long xxh_u64x2; +typedef __vector unsigned char xxh_u8x16; +typedef __vector unsigned xxh_u32x4; + +/* + * UGLY HACK: Similar to aarch64 macOS GCC, s390x GCC has the same aliasing issue. + */ +typedef xxh_u64x2 xxh_aliasing_u64x2 XXH_ALIASING; + +# ifndef XXH_VSX_BE +# if defined(__BIG_ENDIAN__) \ + || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) +# define XXH_VSX_BE 1 +# elif defined(__VEC_ELEMENT_REG_ORDER__) && __VEC_ELEMENT_REG_ORDER__ == __ORDER_BIG_ENDIAN__ +# warning "-maltivec=be is not recommended. Please use native endianness." +# define XXH_VSX_BE 1 +# else +# define XXH_VSX_BE 0 +# endif +# endif /* !defined(XXH_VSX_BE) */ + +# if XXH_VSX_BE +# if defined(__POWER9_VECTOR__) || (defined(__clang__) && defined(__s390x__)) +# define XXH_vec_revb vec_revb +# else +/*! + * A polyfill for POWER9's vec_revb(). + */ +XXH_FORCE_INLINE xxh_u64x2 XXH_vec_revb(xxh_u64x2 val) +{ + xxh_u8x16 const vByteSwap = { 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00, + 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08 }; + return vec_perm(val, val, vByteSwap); +} +# endif +# endif /* XXH_VSX_BE */ + +/*! + * Performs an unaligned vector load and byte swaps it on big endian. + */ +XXH_FORCE_INLINE xxh_u64x2 XXH_vec_loadu(const void *ptr) +{ + xxh_u64x2 ret; + XXH_memcpy(&ret, ptr, sizeof(xxh_u64x2)); +# if XXH_VSX_BE + ret = XXH_vec_revb(ret); +# endif + return ret; +} + +/* + * vec_mulo and vec_mule are very problematic intrinsics on PowerPC + * + * These intrinsics weren't added until GCC 8, despite existing for a while, + * and they are endian dependent. Also, their meaning swap depending on version. + * */ +# if defined(__s390x__) + /* s390x is always big endian, no issue on this platform */ +# define XXH_vec_mulo vec_mulo +# define XXH_vec_mule vec_mule +# elif defined(__clang__) && XXH_HAS_BUILTIN(__builtin_altivec_vmuleuw) && !defined(__ibmxl__) +/* Clang has a better way to control this, we can just use the builtin which doesn't swap. */ + /* The IBM XL Compiler (which defined __clang__) only implements the vec_* operations */ +# define XXH_vec_mulo __builtin_altivec_vmulouw +# define XXH_vec_mule __builtin_altivec_vmuleuw +# else +/* gcc needs inline assembly */ +/* Adapted from https://github.com/google/highwayhash/blob/master/highwayhash/hh_vsx.h. */ +XXH_FORCE_INLINE xxh_u64x2 XXH_vec_mulo(xxh_u32x4 a, xxh_u32x4 b) +{ + xxh_u64x2 result; + __asm__("vmulouw %0, %1, %2" : "=v" (result) : "v" (a), "v" (b)); + return result; +} +XXH_FORCE_INLINE xxh_u64x2 XXH_vec_mule(xxh_u32x4 a, xxh_u32x4 b) +{ + xxh_u64x2 result; + __asm__("vmuleuw %0, %1, %2" : "=v" (result) : "v" (a), "v" (b)); + return result; +} +# endif /* XXH_vec_mulo, XXH_vec_mule */ +#endif /* XXH_VECTOR == XXH_VSX */ + +#if XXH_VECTOR == XXH_SVE +#define ACCRND(acc, offset) \ +do { \ + svuint64_t input_vec = svld1_u64(mask, xinput + offset); \ + svuint64_t secret_vec = svld1_u64(mask, xsecret + offset); \ + svuint64_t mixed = sveor_u64_x(mask, secret_vec, input_vec); \ + svuint64_t swapped = svtbl_u64(input_vec, kSwap); \ + svuint64_t mixed_lo = svextw_u64_x(mask, mixed); \ + svuint64_t mixed_hi = svlsr_n_u64_x(mask, mixed, 32); \ + svuint64_t mul = svmad_u64_x(mask, mixed_lo, mixed_hi, swapped); \ + acc = svadd_u64_x(mask, acc, mul); \ +} while (0) +#endif /* XXH_VECTOR == XXH_SVE */ + +/* prefetch + * can be disabled, by declaring XXH_NO_PREFETCH build macro */ +#if defined(XXH_NO_PREFETCH) +# define XXH_PREFETCH(ptr) (void)(ptr) /* disabled */ +#else +# if XXH_SIZE_OPT >= 1 +# define XXH_PREFETCH(ptr) (void)(ptr) +# elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86)) /* _mm_prefetch() not defined outside of x86/x64 */ +# include /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */ +# define XXH_PREFETCH(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0) +# elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) ) +# define XXH_PREFETCH(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */) +# else +# define XXH_PREFETCH(ptr) (void)(ptr) /* disabled */ +# endif +#endif /* XXH_NO_PREFETCH */ + + +/* ========================================== + * XXH3 default settings + * ========================================== */ + +#define XXH_SECRET_DEFAULT_SIZE 192 /* minimum XXH3_SECRET_SIZE_MIN */ + +#if (XXH_SECRET_DEFAULT_SIZE < XXH3_SECRET_SIZE_MIN) +# error "default keyset is not large enough" +#endif + +/*! Pseudorandom secret taken directly from FARSH. */ +XXH_ALIGN(64) static const xxh_u8 XXH3_kSecret[XXH_SECRET_DEFAULT_SIZE] = { + 0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe, 0x7c, 0x01, 0x81, 0x2c, 0xf7, 0x21, 0xad, 0x1c, + 0xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb, 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3, 0x67, 0x1f, + 0xcb, 0x79, 0xe6, 0x4e, 0xcc, 0xc0, 0xe5, 0x78, 0x82, 0x5a, 0xd0, 0x7d, 0xcc, 0xff, 0x72, 0x21, + 0xb8, 0x08, 0x46, 0x74, 0xf7, 0x43, 0x24, 0x8e, 0xe0, 0x35, 0x90, 0xe6, 0x81, 0x3a, 0x26, 0x4c, + 0x3c, 0x28, 0x52, 0xbb, 0x91, 0xc3, 0x00, 0xcb, 0x88, 0xd0, 0x65, 0x8b, 0x1b, 0x53, 0x2e, 0xa3, + 0x71, 0x64, 0x48, 0x97, 0xa2, 0x0d, 0xf9, 0x4e, 0x38, 0x19, 0xef, 0x46, 0xa9, 0xde, 0xac, 0xd8, + 0xa8, 0xfa, 0x76, 0x3f, 0xe3, 0x9c, 0x34, 0x3f, 0xf9, 0xdc, 0xbb, 0xc7, 0xc7, 0x0b, 0x4f, 0x1d, + 0x8a, 0x51, 0xe0, 0x4b, 0xcd, 0xb4, 0x59, 0x31, 0xc8, 0x9f, 0x7e, 0xc9, 0xd9, 0x78, 0x73, 0x64, + 0xea, 0xc5, 0xac, 0x83, 0x34, 0xd3, 0xeb, 0xc3, 0xc5, 0x81, 0xa0, 0xff, 0xfa, 0x13, 0x63, 0xeb, + 0x17, 0x0d, 0xdd, 0x51, 0xb7, 0xf0, 0xda, 0x49, 0xd3, 0x16, 0x55, 0x26, 0x29, 0xd4, 0x68, 0x9e, + 0x2b, 0x16, 0xbe, 0x58, 0x7d, 0x47, 0xa1, 0xfc, 0x8f, 0xf8, 0xb8, 0xd1, 0x7a, 0xd0, 0x31, 0xce, + 0x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e, +}; + +static const xxh_u64 PRIME_MX1 = 0x165667919E3779F9ULL; /*!< 0b0001011001010110011001111001000110011110001101110111100111111001 */ +static const xxh_u64 PRIME_MX2 = 0x9FB21C651E98DF25ULL; /*!< 0b1001111110110010000111000110010100011110100110001101111100100101 */ + +#ifdef XXH_OLD_NAMES +# define kSecret XXH3_kSecret +#endif + +#ifdef XXH_DOXYGEN +/*! + * @brief Calculates a 32-bit to 64-bit long multiply. + * + * Implemented as a macro. + * + * Wraps `__emulu` on MSVC x86 because it tends to call `__allmul` when it doesn't + * need to (but it shouldn't need to anyways, it is about 7 instructions to do + * a 64x64 multiply...). Since we know that this will _always_ emit `MULL`, we + * use that instead of the normal method. + * + * If you are compiling for platforms like Thumb-1 and don't have a better option, + * you may also want to write your own long multiply routine here. + * + * @param x, y Numbers to be multiplied + * @return 64-bit product of the low 32 bits of @p x and @p y. + */ +XXH_FORCE_INLINE xxh_u64 +XXH_mult32to64(xxh_u64 x, xxh_u64 y) +{ + return (x & 0xFFFFFFFF) * (y & 0xFFFFFFFF); +} +#elif defined(_MSC_VER) && defined(_M_IX86) +# define XXH_mult32to64(x, y) __emulu((unsigned)(x), (unsigned)(y)) +#else +/* + * Downcast + upcast is usually better than masking on older compilers like + * GCC 4.2 (especially 32-bit ones), all without affecting newer compilers. + * + * The other method, (x & 0xFFFFFFFF) * (y & 0xFFFFFFFF), will AND both operands + * and perform a full 64x64 multiply -- entirely redundant on 32-bit. + */ +# define XXH_mult32to64(x, y) ((xxh_u64)(xxh_u32)(x) * (xxh_u64)(xxh_u32)(y)) +#endif + +/*! + * @brief Calculates a 64->128-bit long multiply. + * + * Uses `__uint128_t` and `_umul128` if available, otherwise uses a scalar + * version. + * + * @param lhs , rhs The 64-bit integers to be multiplied + * @return The 128-bit result represented in an @ref XXH128_hash_t. + */ +static XXH128_hash_t +XXH_mult64to128(xxh_u64 lhs, xxh_u64 rhs) +{ + /* + * GCC/Clang __uint128_t method. + * + * On most 64-bit targets, GCC and Clang define a __uint128_t type. + * This is usually the best way as it usually uses a native long 64-bit + * multiply, such as MULQ on x86_64 or MUL + UMULH on aarch64. + * + * Usually. + * + * Despite being a 32-bit platform, Clang (and emscripten) define this type + * despite not having the arithmetic for it. This results in a laggy + * compiler builtin call which calculates a full 128-bit multiply. + * In that case it is best to use the portable one. + * https://github.com/Cyan4973/xxHash/issues/211#issuecomment-515575677 + */ +#if (defined(__GNUC__) || defined(__clang__)) && !defined(__wasm__) \ + && defined(__SIZEOF_INT128__) \ + || (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128) + + __uint128_t const product = (__uint128_t)lhs * (__uint128_t)rhs; + XXH128_hash_t r128; + r128.low64 = (xxh_u64)(product); + r128.high64 = (xxh_u64)(product >> 64); + return r128; + + /* + * MSVC for x64's _umul128 method. + * + * xxh_u64 _umul128(xxh_u64 Multiplier, xxh_u64 Multiplicand, xxh_u64 *HighProduct); + * + * This compiles to single operand MUL on x64. + */ +#elif (defined(_M_X64) || defined(_M_IA64)) && !defined(_M_ARM64EC) + +#ifndef _MSC_VER +# pragma intrinsic(_umul128) +#endif + xxh_u64 product_high; + xxh_u64 const product_low = _umul128(lhs, rhs, &product_high); + XXH128_hash_t r128; + r128.low64 = product_low; + r128.high64 = product_high; + return r128; + + /* + * MSVC for ARM64's __umulh method. + * + * This compiles to the same MUL + UMULH as GCC/Clang's __uint128_t method. + */ +#elif defined(_M_ARM64) || defined(_M_ARM64EC) + +#ifndef _MSC_VER +# pragma intrinsic(__umulh) +#endif + XXH128_hash_t r128; + r128.low64 = lhs * rhs; + r128.high64 = __umulh(lhs, rhs); + return r128; + +#else + /* + * Portable scalar method. Optimized for 32-bit and 64-bit ALUs. + * + * This is a fast and simple grade school multiply, which is shown below + * with base 10 arithmetic instead of base 0x100000000. + * + * 9 3 // D2 lhs = 93 + * x 7 5 // D2 rhs = 75 + * ---------- + * 1 5 // D2 lo_lo = (93 % 10) * (75 % 10) = 15 + * 4 5 | // D2 hi_lo = (93 / 10) * (75 % 10) = 45 + * 2 1 | // D2 lo_hi = (93 % 10) * (75 / 10) = 21 + * + 6 3 | | // D2 hi_hi = (93 / 10) * (75 / 10) = 63 + * --------- + * 2 7 | // D2 cross = (15 / 10) + (45 % 10) + 21 = 27 + * + 6 7 | | // D2 upper = (27 / 10) + (45 / 10) + 63 = 67 + * --------- + * 6 9 7 5 // D4 res = (27 * 10) + (15 % 10) + (67 * 100) = 6975 + * + * The reasons for adding the products like this are: + * 1. It avoids manual carry tracking. Just like how + * (9 * 9) + 9 + 9 = 99, the same applies with this for UINT64_MAX. + * This avoids a lot of complexity. + * + * 2. It hints for, and on Clang, compiles to, the powerful UMAAL + * instruction available in ARM's Digital Signal Processing extension + * in 32-bit ARMv6 and later, which is shown below: + * + * void UMAAL(xxh_u32 *RdLo, xxh_u32 *RdHi, xxh_u32 Rn, xxh_u32 Rm) + * { + * xxh_u64 product = (xxh_u64)*RdLo * (xxh_u64)*RdHi + Rn + Rm; + * *RdLo = (xxh_u32)(product & 0xFFFFFFFF); + * *RdHi = (xxh_u32)(product >> 32); + * } + * + * This instruction was designed for efficient long multiplication, and + * allows this to be calculated in only 4 instructions at speeds + * comparable to some 64-bit ALUs. + * + * 3. It isn't terrible on other platforms. Usually this will be a couple + * of 32-bit ADD/ADCs. + */ + + /* First calculate all of the cross products. */ + xxh_u64 const lo_lo = XXH_mult32to64(lhs & 0xFFFFFFFF, rhs & 0xFFFFFFFF); + xxh_u64 const hi_lo = XXH_mult32to64(lhs >> 32, rhs & 0xFFFFFFFF); + xxh_u64 const lo_hi = XXH_mult32to64(lhs & 0xFFFFFFFF, rhs >> 32); + xxh_u64 const hi_hi = XXH_mult32to64(lhs >> 32, rhs >> 32); + + /* Now add the products together. These will never overflow. */ + xxh_u64 const cross = (lo_lo >> 32) + (hi_lo & 0xFFFFFFFF) + lo_hi; + xxh_u64 const upper = (hi_lo >> 32) + (cross >> 32) + hi_hi; + xxh_u64 const lower = (cross << 32) | (lo_lo & 0xFFFFFFFF); + + XXH128_hash_t r128; + r128.low64 = lower; + r128.high64 = upper; + return r128; +#endif +} + +/*! + * @brief Calculates a 64-bit to 128-bit multiply, then XOR folds it. + * + * The reason for the separate function is to prevent passing too many structs + * around by value. This will hopefully inline the multiply, but we don't force it. + * + * @param lhs , rhs The 64-bit integers to multiply + * @return The low 64 bits of the product XOR'd by the high 64 bits. + * @see XXH_mult64to128() + */ +static xxh_u64 +XXH3_mul128_fold64(xxh_u64 lhs, xxh_u64 rhs) +{ + XXH128_hash_t product = XXH_mult64to128(lhs, rhs); + return product.low64 ^ product.high64; +} + +/*! Seems to produce slightly better code on GCC for some reason. */ +XXH_FORCE_INLINE XXH_CONSTF xxh_u64 XXH_xorshift64(xxh_u64 v64, int shift) +{ + XXH_ASSERT(0 <= shift && shift < 64); + return v64 ^ (v64 >> shift); +} + +/* + * This is a fast avalanche stage, + * suitable when input bits are already partially mixed + */ +static XXH64_hash_t XXH3_avalanche(xxh_u64 h64) +{ + h64 = XXH_xorshift64(h64, 37); + h64 *= PRIME_MX1; + h64 = XXH_xorshift64(h64, 32); + return h64; +} + +/* + * This is a stronger avalanche, + * inspired by Pelle Evensen's rrmxmx + * preferable when input has not been previously mixed + */ +static XXH64_hash_t XXH3_rrmxmx(xxh_u64 h64, xxh_u64 len) +{ + /* this mix is inspired by Pelle Evensen's rrmxmx */ + h64 ^= XXH_rotl64(h64, 49) ^ XXH_rotl64(h64, 24); + h64 *= PRIME_MX2; + h64 ^= (h64 >> 35) + len ; + h64 *= PRIME_MX2; + return XXH_xorshift64(h64, 28); +} + + +/* ========================================== + * Short keys + * ========================================== + * One of the shortcomings of XXH32 and XXH64 was that their performance was + * sub-optimal on short lengths. It used an iterative algorithm which strongly + * favored lengths that were a multiple of 4 or 8. + * + * Instead of iterating over individual inputs, we use a set of single shot + * functions which piece together a range of lengths and operate in constant time. + * + * Additionally, the number of multiplies has been significantly reduced. This + * reduces latency, especially when emulating 64-bit multiplies on 32-bit. + * + * Depending on the platform, this may or may not be faster than XXH32, but it + * is almost guaranteed to be faster than XXH64. + */ + +/* + * At very short lengths, there isn't enough input to fully hide secrets, or use + * the entire secret. + * + * There is also only a limited amount of mixing we can do before significantly + * impacting performance. + * + * Therefore, we use different sections of the secret and always mix two secret + * samples with an XOR. This should have no effect on performance on the + * seedless or withSeed variants because everything _should_ be constant folded + * by modern compilers. + * + * The XOR mixing hides individual parts of the secret and increases entropy. + * + * This adds an extra layer of strength for custom secrets. + */ +XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t +XXH3_len_1to3_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed) +{ + XXH_ASSERT(input != NULL); + XXH_ASSERT(1 <= len && len <= 3); + XXH_ASSERT(secret != NULL); + /* + * len = 1: combined = { input[0], 0x01, input[0], input[0] } + * len = 2: combined = { input[1], 0x02, input[0], input[1] } + * len = 3: combined = { input[2], 0x03, input[0], input[1] } + */ + { xxh_u8 const c1 = input[0]; + xxh_u8 const c2 = input[len >> 1]; + xxh_u8 const c3 = input[len - 1]; + xxh_u32 const combined = ((xxh_u32)c1 << 16) | ((xxh_u32)c2 << 24) + | ((xxh_u32)c3 << 0) | ((xxh_u32)len << 8); + xxh_u64 const bitflip = (XXH_readLE32(secret) ^ XXH_readLE32(secret+4)) + seed; + xxh_u64 const keyed = (xxh_u64)combined ^ bitflip; + return XXH64_avalanche(keyed); + } +} + +XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t +XXH3_len_4to8_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed) +{ + XXH_ASSERT(input != NULL); + XXH_ASSERT(secret != NULL); + XXH_ASSERT(4 <= len && len <= 8); + seed ^= (xxh_u64)XXH_swap32((xxh_u32)seed) << 32; + { xxh_u32 const input1 = XXH_readLE32(input); + xxh_u32 const input2 = XXH_readLE32(input + len - 4); + xxh_u64 const bitflip = (XXH_readLE64(secret+8) ^ XXH_readLE64(secret+16)) - seed; + xxh_u64 const input64 = input2 + (((xxh_u64)input1) << 32); + xxh_u64 const keyed = input64 ^ bitflip; + return XXH3_rrmxmx(keyed, len); + } +} + +XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t +XXH3_len_9to16_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed) +{ + XXH_ASSERT(input != NULL); + XXH_ASSERT(secret != NULL); + XXH_ASSERT(9 <= len && len <= 16); + { xxh_u64 const bitflip1 = (XXH_readLE64(secret+24) ^ XXH_readLE64(secret+32)) + seed; + xxh_u64 const bitflip2 = (XXH_readLE64(secret+40) ^ XXH_readLE64(secret+48)) - seed; + xxh_u64 const input_lo = XXH_readLE64(input) ^ bitflip1; + xxh_u64 const input_hi = XXH_readLE64(input + len - 8) ^ bitflip2; + xxh_u64 const acc = len + + XXH_swap64(input_lo) + input_hi + + XXH3_mul128_fold64(input_lo, input_hi); + return XXH3_avalanche(acc); + } +} + +XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t +XXH3_len_0to16_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed) +{ + XXH_ASSERT(len <= 16); + { if (XXH_likely(len > 8)) return XXH3_len_9to16_64b(input, len, secret, seed); + if (XXH_likely(len >= 4)) return XXH3_len_4to8_64b(input, len, secret, seed); + if (len) return XXH3_len_1to3_64b(input, len, secret, seed); + return XXH64_avalanche(seed ^ (XXH_readLE64(secret+56) ^ XXH_readLE64(secret+64))); + } +} + +/* + * DISCLAIMER: There are known *seed-dependent* multicollisions here due to + * multiplication by zero, affecting hashes of lengths 17 to 240. + * + * However, they are very unlikely. + * + * Keep this in mind when using the unseeded XXH3_64bits() variant: As with all + * unseeded non-cryptographic hashes, it does not attempt to defend itself + * against specially crafted inputs, only random inputs. + * + * Compared to classic UMAC where a 1 in 2^31 chance of 4 consecutive bytes + * cancelling out the secret is taken an arbitrary number of times (addressed + * in XXH3_accumulate_512), this collision is very unlikely with random inputs + * and/or proper seeding: + * + * This only has a 1 in 2^63 chance of 8 consecutive bytes cancelling out, in a + * function that is only called up to 16 times per hash with up to 240 bytes of + * input. + * + * This is not too bad for a non-cryptographic hash function, especially with + * only 64 bit outputs. + * + * The 128-bit variant (which trades some speed for strength) is NOT affected + * by this, although it is always a good idea to use a proper seed if you care + * about strength. + */ +XXH_FORCE_INLINE xxh_u64 XXH3_mix16B(const xxh_u8* XXH_RESTRICT input, + const xxh_u8* XXH_RESTRICT secret, xxh_u64 seed64) +{ +#if defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \ + && defined(__i386__) && defined(__SSE2__) /* x86 + SSE2 */ \ + && !defined(XXH_ENABLE_AUTOVECTORIZE) /* Define to disable like XXH32 hack */ + /* + * UGLY HACK: + * GCC for x86 tends to autovectorize the 128-bit multiply, resulting in + * slower code. + * + * By forcing seed64 into a register, we disrupt the cost model and + * cause it to scalarize. See `XXH32_round()` + * + * FIXME: Clang's output is still _much_ faster -- On an AMD Ryzen 3600, + * XXH3_64bits @ len=240 runs at 4.6 GB/s with Clang 9, but 3.3 GB/s on + * GCC 9.2, despite both emitting scalar code. + * + * GCC generates much better scalar code than Clang for the rest of XXH3, + * which is why finding a more optimal codepath is an interest. + */ + XXH_COMPILER_GUARD(seed64); +#endif + { xxh_u64 const input_lo = XXH_readLE64(input); + xxh_u64 const input_hi = XXH_readLE64(input+8); + return XXH3_mul128_fold64( + input_lo ^ (XXH_readLE64(secret) + seed64), + input_hi ^ (XXH_readLE64(secret+8) - seed64) + ); + } +} + +/* For mid range keys, XXH3 uses a Mum-hash variant. */ +XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t +XXH3_len_17to128_64b(const xxh_u8* XXH_RESTRICT input, size_t len, + const xxh_u8* XXH_RESTRICT secret, size_t secretSize, + XXH64_hash_t seed) +{ + XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize; + XXH_ASSERT(16 < len && len <= 128); + + { xxh_u64 acc = len * XXH_PRIME64_1; +#if XXH_SIZE_OPT >= 1 + /* Smaller and cleaner, but slightly slower. */ + unsigned int i = (unsigned int)(len - 1) / 32; + do { + acc += XXH3_mix16B(input+16 * i, secret+32*i, seed); + acc += XXH3_mix16B(input+len-16*(i+1), secret+32*i+16, seed); + } while (i-- != 0); +#else + if (len > 32) { + if (len > 64) { + if (len > 96) { + acc += XXH3_mix16B(input+48, secret+96, seed); + acc += XXH3_mix16B(input+len-64, secret+112, seed); + } + acc += XXH3_mix16B(input+32, secret+64, seed); + acc += XXH3_mix16B(input+len-48, secret+80, seed); + } + acc += XXH3_mix16B(input+16, secret+32, seed); + acc += XXH3_mix16B(input+len-32, secret+48, seed); + } + acc += XXH3_mix16B(input+0, secret+0, seed); + acc += XXH3_mix16B(input+len-16, secret+16, seed); +#endif + return XXH3_avalanche(acc); + } +} + +/*! + * @brief Maximum size of "short" key in bytes. + */ +#define XXH3_MIDSIZE_MAX 240 + +XXH_NO_INLINE XXH_PUREF XXH64_hash_t +XXH3_len_129to240_64b(const xxh_u8* XXH_RESTRICT input, size_t len, + const xxh_u8* XXH_RESTRICT secret, size_t secretSize, + XXH64_hash_t seed) +{ + XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize; + XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX); + + #define XXH3_MIDSIZE_STARTOFFSET 3 + #define XXH3_MIDSIZE_LASTOFFSET 17 + + { xxh_u64 acc = len * XXH_PRIME64_1; + xxh_u64 acc_end; + unsigned int const nbRounds = (unsigned int)len / 16; + unsigned int i; + XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX); + for (i=0; i<8; i++) { + acc += XXH3_mix16B(input+(16*i), secret+(16*i), seed); + } + /* last bytes */ + acc_end = XXH3_mix16B(input + len - 16, secret + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET, seed); + XXH_ASSERT(nbRounds >= 8); + acc = XXH3_avalanche(acc); +#if defined(__clang__) /* Clang */ \ + && (defined(__ARM_NEON) || defined(__ARM_NEON__)) /* NEON */ \ + && !defined(XXH_ENABLE_AUTOVECTORIZE) /* Define to disable */ + /* + * UGLY HACK: + * Clang for ARMv7-A tries to vectorize this loop, similar to GCC x86. + * In everywhere else, it uses scalar code. + * + * For 64->128-bit multiplies, even if the NEON was 100% optimal, it + * would still be slower than UMAAL (see XXH_mult64to128). + * + * Unfortunately, Clang doesn't handle the long multiplies properly and + * converts them to the nonexistent "vmulq_u64" intrinsic, which is then + * scalarized into an ugly mess of VMOV.32 instructions. + * + * This mess is difficult to avoid without turning autovectorization + * off completely, but they are usually relatively minor and/or not + * worth it to fix. + * + * This loop is the easiest to fix, as unlike XXH32, this pragma + * _actually works_ because it is a loop vectorization instead of an + * SLP vectorization. + */ + #pragma clang loop vectorize(disable) +#endif + for (i=8 ; i < nbRounds; i++) { + /* + * Prevents clang for unrolling the acc loop and interleaving with this one. + */ + XXH_COMPILER_GUARD(acc); + acc_end += XXH3_mix16B(input+(16*i), secret+(16*(i-8)) + XXH3_MIDSIZE_STARTOFFSET, seed); + } + return XXH3_avalanche(acc + acc_end); + } +} + + +/* ======= Long Keys ======= */ + +#define XXH_STRIPE_LEN 64 +#define XXH_SECRET_CONSUME_RATE 8 /* nb of secret bytes consumed at each accumulation */ +#define XXH_ACC_NB (XXH_STRIPE_LEN / sizeof(xxh_u64)) + +#ifdef XXH_OLD_NAMES +# define STRIPE_LEN XXH_STRIPE_LEN +# define ACC_NB XXH_ACC_NB +#endif + +#ifndef XXH_PREFETCH_DIST +# ifdef __clang__ +# define XXH_PREFETCH_DIST 320 +# else +# if (XXH_VECTOR == XXH_AVX512) +# define XXH_PREFETCH_DIST 512 +# else +# define XXH_PREFETCH_DIST 384 +# endif +# endif /* __clang__ */ +#endif /* XXH_PREFETCH_DIST */ + +/* + * These macros are to generate an XXH3_accumulate() function. + * The two arguments select the name suffix and target attribute. + * + * The name of this symbol is XXH3_accumulate_() and it calls + * XXH3_accumulate_512_(). + * + * It may be useful to hand implement this function if the compiler fails to + * optimize the inline function. + */ +#define XXH3_ACCUMULATE_TEMPLATE(name) \ +void \ +XXH3_accumulate_##name(xxh_u64* XXH_RESTRICT acc, \ + const xxh_u8* XXH_RESTRICT input, \ + const xxh_u8* XXH_RESTRICT secret, \ + size_t nbStripes) \ +{ \ + size_t n; \ + for (n = 0; n < nbStripes; n++ ) { \ + const xxh_u8* const in = input + n*XXH_STRIPE_LEN; \ + XXH_PREFETCH(in + XXH_PREFETCH_DIST); \ + XXH3_accumulate_512_##name( \ + acc, \ + in, \ + secret + n*XXH_SECRET_CONSUME_RATE); \ + } \ +} + + +XXH_FORCE_INLINE void XXH_writeLE64(void* dst, xxh_u64 v64) +{ + if (!XXH_CPU_LITTLE_ENDIAN) v64 = XXH_swap64(v64); + XXH_memcpy(dst, &v64, sizeof(v64)); +} + +/* Several intrinsic functions below are supposed to accept __int64 as argument, + * as documented in https://software.intel.com/sites/landingpage/IntrinsicsGuide/ . + * However, several environments do not define __int64 type, + * requiring a workaround. + */ +#if !defined (__VMS) \ + && (defined (__cplusplus) \ + || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) + typedef int64_t xxh_i64; +#else + /* the following type must have a width of 64-bit */ + typedef long long xxh_i64; +#endif + + +/* + * XXH3_accumulate_512 is the tightest loop for long inputs, and it is the most optimized. + * + * It is a hardened version of UMAC, based off of FARSH's implementation. + * + * This was chosen because it adapts quite well to 32-bit, 64-bit, and SIMD + * implementations, and it is ridiculously fast. + * + * We harden it by mixing the original input to the accumulators as well as the product. + * + * This means that in the (relatively likely) case of a multiply by zero, the + * original input is preserved. + * + * On 128-bit inputs, we swap 64-bit pairs when we add the input to improve + * cross-pollination, as otherwise the upper and lower halves would be + * essentially independent. + * + * This doesn't matter on 64-bit hashes since they all get merged together in + * the end, so we skip the extra step. + * + * Both XXH3_64bits and XXH3_128bits use this subroutine. + */ + +#if (XXH_VECTOR == XXH_AVX512) \ + || (defined(XXH_DISPATCH_AVX512) && XXH_DISPATCH_AVX512 != 0) + +#ifndef XXH_TARGET_AVX512 +# define XXH_TARGET_AVX512 /* disable attribute target */ +#endif + +XXH_FORCE_INLINE XXH_TARGET_AVX512 void +XXH3_accumulate_512_avx512(void* XXH_RESTRICT acc, + const void* XXH_RESTRICT input, + const void* XXH_RESTRICT secret) +{ + __m512i* const xacc = (__m512i *) acc; + XXH_ASSERT((((size_t)acc) & 63) == 0); + XXH_STATIC_ASSERT(XXH_STRIPE_LEN == sizeof(__m512i)); + + { + /* data_vec = input[0]; */ + __m512i const data_vec = _mm512_loadu_si512 (input); + /* key_vec = secret[0]; */ + __m512i const key_vec = _mm512_loadu_si512 (secret); + /* data_key = data_vec ^ key_vec; */ + __m512i const data_key = _mm512_xor_si512 (data_vec, key_vec); + /* data_key_lo = data_key >> 32; */ + __m512i const data_key_lo = _mm512_srli_epi64 (data_key, 32); + /* product = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */ + __m512i const product = _mm512_mul_epu32 (data_key, data_key_lo); + /* xacc[0] += swap(data_vec); */ + __m512i const data_swap = _mm512_shuffle_epi32(data_vec, (_MM_PERM_ENUM)_MM_SHUFFLE(1, 0, 3, 2)); + __m512i const sum = _mm512_add_epi64(*xacc, data_swap); + /* xacc[0] += product; */ + *xacc = _mm512_add_epi64(product, sum); + } +} +XXH_FORCE_INLINE XXH_TARGET_AVX512 XXH3_ACCUMULATE_TEMPLATE(avx512) + +/* + * XXH3_scrambleAcc: Scrambles the accumulators to improve mixing. + * + * Multiplication isn't perfect, as explained by Google in HighwayHash: + * + * // Multiplication mixes/scrambles bytes 0-7 of the 64-bit result to + * // varying degrees. In descending order of goodness, bytes + * // 3 4 2 5 1 6 0 7 have quality 228 224 164 160 100 96 36 32. + * // As expected, the upper and lower bytes are much worse. + * + * Source: https://github.com/google/highwayhash/blob/0aaf66b/highwayhash/hh_avx2.h#L291 + * + * Since our algorithm uses a pseudorandom secret to add some variance into the + * mix, we don't need to (or want to) mix as often or as much as HighwayHash does. + * + * This isn't as tight as XXH3_accumulate, but still written in SIMD to avoid + * extraction. + * + * Both XXH3_64bits and XXH3_128bits use this subroutine. + */ + +XXH_FORCE_INLINE XXH_TARGET_AVX512 void +XXH3_scrambleAcc_avx512(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret) +{ + XXH_ASSERT((((size_t)acc) & 63) == 0); + XXH_STATIC_ASSERT(XXH_STRIPE_LEN == sizeof(__m512i)); + { __m512i* const xacc = (__m512i*) acc; + const __m512i prime32 = _mm512_set1_epi32((int)XXH_PRIME32_1); + + /* xacc[0] ^= (xacc[0] >> 47) */ + __m512i const acc_vec = *xacc; + __m512i const shifted = _mm512_srli_epi64 (acc_vec, 47); + /* xacc[0] ^= secret; */ + __m512i const key_vec = _mm512_loadu_si512 (secret); + __m512i const data_key = _mm512_ternarylogic_epi32(key_vec, acc_vec, shifted, 0x96 /* key_vec ^ acc_vec ^ shifted */); + + /* xacc[0] *= XXH_PRIME32_1; */ + __m512i const data_key_hi = _mm512_srli_epi64 (data_key, 32); + __m512i const prod_lo = _mm512_mul_epu32 (data_key, prime32); + __m512i const prod_hi = _mm512_mul_epu32 (data_key_hi, prime32); + *xacc = _mm512_add_epi64(prod_lo, _mm512_slli_epi64(prod_hi, 32)); + } +} + +XXH_FORCE_INLINE XXH_TARGET_AVX512 void +XXH3_initCustomSecret_avx512(void* XXH_RESTRICT customSecret, xxh_u64 seed64) +{ + XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 63) == 0); + XXH_STATIC_ASSERT(XXH_SEC_ALIGN == 64); + XXH_ASSERT(((size_t)customSecret & 63) == 0); + (void)(&XXH_writeLE64); + { int const nbRounds = XXH_SECRET_DEFAULT_SIZE / sizeof(__m512i); + __m512i const seed_pos = _mm512_set1_epi64((xxh_i64)seed64); + __m512i const seed = _mm512_mask_sub_epi64(seed_pos, 0xAA, _mm512_set1_epi8(0), seed_pos); + + const __m512i* const src = (const __m512i*) ((const void*) XXH3_kSecret); + __m512i* const dest = ( __m512i*) customSecret; + int i; + XXH_ASSERT(((size_t)src & 63) == 0); /* control alignment */ + XXH_ASSERT(((size_t)dest & 63) == 0); + for (i=0; i < nbRounds; ++i) { + dest[i] = _mm512_add_epi64(_mm512_load_si512(src + i), seed); + } } +} + +#endif + +#if (XXH_VECTOR == XXH_AVX2) \ + || (defined(XXH_DISPATCH_AVX2) && XXH_DISPATCH_AVX2 != 0) + +#ifndef XXH_TARGET_AVX2 +# define XXH_TARGET_AVX2 /* disable attribute target */ +#endif + +XXH_FORCE_INLINE XXH_TARGET_AVX2 void +XXH3_accumulate_512_avx2( void* XXH_RESTRICT acc, + const void* XXH_RESTRICT input, + const void* XXH_RESTRICT secret) +{ + XXH_ASSERT((((size_t)acc) & 31) == 0); + { __m256i* const xacc = (__m256i *) acc; + /* Unaligned. This is mainly for pointer arithmetic, and because + * _mm256_loadu_si256 requires a const __m256i * pointer for some reason. */ + const __m256i* const xinput = (const __m256i *) input; + /* Unaligned. This is mainly for pointer arithmetic, and because + * _mm256_loadu_si256 requires a const __m256i * pointer for some reason. */ + const __m256i* const xsecret = (const __m256i *) secret; + + size_t i; + for (i=0; i < XXH_STRIPE_LEN/sizeof(__m256i); i++) { + /* data_vec = xinput[i]; */ + __m256i const data_vec = _mm256_loadu_si256 (xinput+i); + /* key_vec = xsecret[i]; */ + __m256i const key_vec = _mm256_loadu_si256 (xsecret+i); + /* data_key = data_vec ^ key_vec; */ + __m256i const data_key = _mm256_xor_si256 (data_vec, key_vec); + /* data_key_lo = data_key >> 32; */ + __m256i const data_key_lo = _mm256_srli_epi64 (data_key, 32); + /* product = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */ + __m256i const product = _mm256_mul_epu32 (data_key, data_key_lo); + /* xacc[i] += swap(data_vec); */ + __m256i const data_swap = _mm256_shuffle_epi32(data_vec, _MM_SHUFFLE(1, 0, 3, 2)); + __m256i const sum = _mm256_add_epi64(xacc[i], data_swap); + /* xacc[i] += product; */ + xacc[i] = _mm256_add_epi64(product, sum); + } } +} +XXH_FORCE_INLINE XXH_TARGET_AVX2 XXH3_ACCUMULATE_TEMPLATE(avx2) + +XXH_FORCE_INLINE XXH_TARGET_AVX2 void +XXH3_scrambleAcc_avx2(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret) +{ + XXH_ASSERT((((size_t)acc) & 31) == 0); + { __m256i* const xacc = (__m256i*) acc; + /* Unaligned. This is mainly for pointer arithmetic, and because + * _mm256_loadu_si256 requires a const __m256i * pointer for some reason. */ + const __m256i* const xsecret = (const __m256i *) secret; + const __m256i prime32 = _mm256_set1_epi32((int)XXH_PRIME32_1); + + size_t i; + for (i=0; i < XXH_STRIPE_LEN/sizeof(__m256i); i++) { + /* xacc[i] ^= (xacc[i] >> 47) */ + __m256i const acc_vec = xacc[i]; + __m256i const shifted = _mm256_srli_epi64 (acc_vec, 47); + __m256i const data_vec = _mm256_xor_si256 (acc_vec, shifted); + /* xacc[i] ^= xsecret; */ + __m256i const key_vec = _mm256_loadu_si256 (xsecret+i); + __m256i const data_key = _mm256_xor_si256 (data_vec, key_vec); + + /* xacc[i] *= XXH_PRIME32_1; */ + __m256i const data_key_hi = _mm256_srli_epi64 (data_key, 32); + __m256i const prod_lo = _mm256_mul_epu32 (data_key, prime32); + __m256i const prod_hi = _mm256_mul_epu32 (data_key_hi, prime32); + xacc[i] = _mm256_add_epi64(prod_lo, _mm256_slli_epi64(prod_hi, 32)); + } + } +} + +XXH_FORCE_INLINE XXH_TARGET_AVX2 void XXH3_initCustomSecret_avx2(void* XXH_RESTRICT customSecret, xxh_u64 seed64) +{ + XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 31) == 0); + XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE / sizeof(__m256i)) == 6); + XXH_STATIC_ASSERT(XXH_SEC_ALIGN <= 64); + (void)(&XXH_writeLE64); + XXH_PREFETCH(customSecret); + { __m256i const seed = _mm256_set_epi64x((xxh_i64)(0U - seed64), (xxh_i64)seed64, (xxh_i64)(0U - seed64), (xxh_i64)seed64); + + const __m256i* const src = (const __m256i*) ((const void*) XXH3_kSecret); + __m256i* dest = ( __m256i*) customSecret; + +# if defined(__GNUC__) || defined(__clang__) + /* + * On GCC & Clang, marking 'dest' as modified will cause the compiler: + * - do not extract the secret from sse registers in the internal loop + * - use less common registers, and avoid pushing these reg into stack + */ + XXH_COMPILER_GUARD(dest); +# endif + XXH_ASSERT(((size_t)src & 31) == 0); /* control alignment */ + XXH_ASSERT(((size_t)dest & 31) == 0); + + /* GCC -O2 need unroll loop manually */ + dest[0] = _mm256_add_epi64(_mm256_load_si256(src+0), seed); + dest[1] = _mm256_add_epi64(_mm256_load_si256(src+1), seed); + dest[2] = _mm256_add_epi64(_mm256_load_si256(src+2), seed); + dest[3] = _mm256_add_epi64(_mm256_load_si256(src+3), seed); + dest[4] = _mm256_add_epi64(_mm256_load_si256(src+4), seed); + dest[5] = _mm256_add_epi64(_mm256_load_si256(src+5), seed); + } +} + +#endif + +/* x86dispatch always generates SSE2 */ +#if (XXH_VECTOR == XXH_SSE2) || defined(XXH_X86DISPATCH) + +#ifndef XXH_TARGET_SSE2 +# define XXH_TARGET_SSE2 /* disable attribute target */ +#endif + +XXH_FORCE_INLINE XXH_TARGET_SSE2 void +XXH3_accumulate_512_sse2( void* XXH_RESTRICT acc, + const void* XXH_RESTRICT input, + const void* XXH_RESTRICT secret) +{ + /* SSE2 is just a half-scale version of the AVX2 version. */ + XXH_ASSERT((((size_t)acc) & 15) == 0); + { __m128i* const xacc = (__m128i *) acc; + /* Unaligned. This is mainly for pointer arithmetic, and because + * _mm_loadu_si128 requires a const __m128i * pointer for some reason. */ + const __m128i* const xinput = (const __m128i *) input; + /* Unaligned. This is mainly for pointer arithmetic, and because + * _mm_loadu_si128 requires a const __m128i * pointer for some reason. */ + const __m128i* const xsecret = (const __m128i *) secret; + + size_t i; + for (i=0; i < XXH_STRIPE_LEN/sizeof(__m128i); i++) { + /* data_vec = xinput[i]; */ + __m128i const data_vec = _mm_loadu_si128 (xinput+i); + /* key_vec = xsecret[i]; */ + __m128i const key_vec = _mm_loadu_si128 (xsecret+i); + /* data_key = data_vec ^ key_vec; */ + __m128i const data_key = _mm_xor_si128 (data_vec, key_vec); + /* data_key_lo = data_key >> 32; */ + __m128i const data_key_lo = _mm_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1)); + /* product = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */ + __m128i const product = _mm_mul_epu32 (data_key, data_key_lo); + /* xacc[i] += swap(data_vec); */ + __m128i const data_swap = _mm_shuffle_epi32(data_vec, _MM_SHUFFLE(1,0,3,2)); + __m128i const sum = _mm_add_epi64(xacc[i], data_swap); + /* xacc[i] += product; */ + xacc[i] = _mm_add_epi64(product, sum); + } } +} +XXH_FORCE_INLINE XXH_TARGET_SSE2 XXH3_ACCUMULATE_TEMPLATE(sse2) + +XXH_FORCE_INLINE XXH_TARGET_SSE2 void +XXH3_scrambleAcc_sse2(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret) +{ + XXH_ASSERT((((size_t)acc) & 15) == 0); + { __m128i* const xacc = (__m128i*) acc; + /* Unaligned. This is mainly for pointer arithmetic, and because + * _mm_loadu_si128 requires a const __m128i * pointer for some reason. */ + const __m128i* const xsecret = (const __m128i *) secret; + const __m128i prime32 = _mm_set1_epi32((int)XXH_PRIME32_1); + + size_t i; + for (i=0; i < XXH_STRIPE_LEN/sizeof(__m128i); i++) { + /* xacc[i] ^= (xacc[i] >> 47) */ + __m128i const acc_vec = xacc[i]; + __m128i const shifted = _mm_srli_epi64 (acc_vec, 47); + __m128i const data_vec = _mm_xor_si128 (acc_vec, shifted); + /* xacc[i] ^= xsecret[i]; */ + __m128i const key_vec = _mm_loadu_si128 (xsecret+i); + __m128i const data_key = _mm_xor_si128 (data_vec, key_vec); + + /* xacc[i] *= XXH_PRIME32_1; */ + __m128i const data_key_hi = _mm_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1)); + __m128i const prod_lo = _mm_mul_epu32 (data_key, prime32); + __m128i const prod_hi = _mm_mul_epu32 (data_key_hi, prime32); + xacc[i] = _mm_add_epi64(prod_lo, _mm_slli_epi64(prod_hi, 32)); + } + } +} + +XXH_FORCE_INLINE XXH_TARGET_SSE2 void XXH3_initCustomSecret_sse2(void* XXH_RESTRICT customSecret, xxh_u64 seed64) +{ + XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 15) == 0); + (void)(&XXH_writeLE64); + { int const nbRounds = XXH_SECRET_DEFAULT_SIZE / sizeof(__m128i); + +# if defined(_MSC_VER) && defined(_M_IX86) && _MSC_VER < 1900 + /* MSVC 32bit mode does not support _mm_set_epi64x before 2015 */ + XXH_ALIGN(16) const xxh_i64 seed64x2[2] = { (xxh_i64)seed64, (xxh_i64)(0U - seed64) }; + __m128i const seed = _mm_load_si128((__m128i const*)seed64x2); +# else + __m128i const seed = _mm_set_epi64x((xxh_i64)(0U - seed64), (xxh_i64)seed64); +# endif + int i; + + const void* const src16 = XXH3_kSecret; + __m128i* dst16 = (__m128i*) customSecret; +# if defined(__GNUC__) || defined(__clang__) + /* + * On GCC & Clang, marking 'dest' as modified will cause the compiler: + * - do not extract the secret from sse registers in the internal loop + * - use less common registers, and avoid pushing these reg into stack + */ + XXH_COMPILER_GUARD(dst16); +# endif + XXH_ASSERT(((size_t)src16 & 15) == 0); /* control alignment */ + XXH_ASSERT(((size_t)dst16 & 15) == 0); + + for (i=0; i < nbRounds; ++i) { + dst16[i] = _mm_add_epi64(_mm_load_si128((const __m128i *)src16+i), seed); + } } +} + +#endif + +#if (XXH_VECTOR == XXH_NEON) + +/* forward declarations for the scalar routines */ +XXH_FORCE_INLINE void +XXH3_scalarRound(void* XXH_RESTRICT acc, void const* XXH_RESTRICT input, + void const* XXH_RESTRICT secret, size_t lane); + +XXH_FORCE_INLINE void +XXH3_scalarScrambleRound(void* XXH_RESTRICT acc, + void const* XXH_RESTRICT secret, size_t lane); + +/*! + * @internal + * @brief The bulk processing loop for NEON and WASM SIMD128. + * + * The NEON code path is actually partially scalar when running on AArch64. This + * is to optimize the pipelining and can have up to 15% speedup depending on the + * CPU, and it also mitigates some GCC codegen issues. + * + * @see XXH3_NEON_LANES for configuring this and details about this optimization. + * + * NEON's 32-bit to 64-bit long multiply takes a half vector of 32-bit + * integers instead of the other platforms which mask full 64-bit vectors, + * so the setup is more complicated than just shifting right. + * + * Additionally, there is an optimization for 4 lanes at once noted below. + * + * Since, as stated, the most optimal amount of lanes for Cortexes is 6, + * there needs to be *three* versions of the accumulate operation used + * for the remaining 2 lanes. + * + * WASM's SIMD128 uses SIMDe's arm_neon.h polyfill because the intrinsics overlap + * nearly perfectly. + */ + +XXH_FORCE_INLINE void +XXH3_accumulate_512_neon( void* XXH_RESTRICT acc, + const void* XXH_RESTRICT input, + const void* XXH_RESTRICT secret) +{ + XXH_ASSERT((((size_t)acc) & 15) == 0); + XXH_STATIC_ASSERT(XXH3_NEON_LANES > 0 && XXH3_NEON_LANES <= XXH_ACC_NB && XXH3_NEON_LANES % 2 == 0); + { /* GCC for darwin arm64 does not like aliasing here */ + xxh_aliasing_uint64x2_t* const xacc = (xxh_aliasing_uint64x2_t*) acc; + /* We don't use a uint32x4_t pointer because it causes bus errors on ARMv7. */ + uint8_t const* xinput = (const uint8_t *) input; + uint8_t const* xsecret = (const uint8_t *) secret; + + size_t i; +#ifdef __wasm_simd128__ + /* + * On WASM SIMD128, Clang emits direct address loads when XXH3_kSecret + * is constant propagated, which results in it converting it to this + * inside the loop: + * + * a = v128.load(XXH3_kSecret + 0 + $secret_offset, offset = 0) + * b = v128.load(XXH3_kSecret + 16 + $secret_offset, offset = 0) + * ... + * + * This requires a full 32-bit address immediate (and therefore a 6 byte + * instruction) as well as an add for each offset. + * + * Putting an asm guard prevents it from folding (at the cost of losing + * the alignment hint), and uses the free offset in `v128.load` instead + * of adding secret_offset each time which overall reduces code size by + * about a kilobyte and improves performance. + */ + XXH_COMPILER_GUARD(xsecret); +#endif + /* Scalar lanes use the normal scalarRound routine */ + for (i = XXH3_NEON_LANES; i < XXH_ACC_NB; i++) { + XXH3_scalarRound(acc, input, secret, i); + } + i = 0; + /* 4 NEON lanes at a time. */ + for (; i+1 < XXH3_NEON_LANES / 2; i+=2) { + /* data_vec = xinput[i]; */ + uint64x2_t data_vec_1 = XXH_vld1q_u64(xinput + (i * 16)); + uint64x2_t data_vec_2 = XXH_vld1q_u64(xinput + ((i+1) * 16)); + /* key_vec = xsecret[i]; */ + uint64x2_t key_vec_1 = XXH_vld1q_u64(xsecret + (i * 16)); + uint64x2_t key_vec_2 = XXH_vld1q_u64(xsecret + ((i+1) * 16)); + /* data_swap = swap(data_vec) */ + uint64x2_t data_swap_1 = vextq_u64(data_vec_1, data_vec_1, 1); + uint64x2_t data_swap_2 = vextq_u64(data_vec_2, data_vec_2, 1); + /* data_key = data_vec ^ key_vec; */ + uint64x2_t data_key_1 = veorq_u64(data_vec_1, key_vec_1); + uint64x2_t data_key_2 = veorq_u64(data_vec_2, key_vec_2); + + /* + * If we reinterpret the 64x2 vectors as 32x4 vectors, we can use a + * de-interleave operation for 4 lanes in 1 step with `vuzpq_u32` to + * get one vector with the low 32 bits of each lane, and one vector + * with the high 32 bits of each lane. + * + * The intrinsic returns a double vector because the original ARMv7-a + * instruction modified both arguments in place. AArch64 and SIMD128 emit + * two instructions from this intrinsic. + * + * [ dk11L | dk11H | dk12L | dk12H ] -> [ dk11L | dk12L | dk21L | dk22L ] + * [ dk21L | dk21H | dk22L | dk22H ] -> [ dk11H | dk12H | dk21H | dk22H ] + */ + uint32x4x2_t unzipped = vuzpq_u32( + vreinterpretq_u32_u64(data_key_1), + vreinterpretq_u32_u64(data_key_2) + ); + /* data_key_lo = data_key & 0xFFFFFFFF */ + uint32x4_t data_key_lo = unzipped.val[0]; + /* data_key_hi = data_key >> 32 */ + uint32x4_t data_key_hi = unzipped.val[1]; + /* + * Then, we can split the vectors horizontally and multiply which, as for most + * widening intrinsics, have a variant that works on both high half vectors + * for free on AArch64. A similar instruction is available on SIMD128. + * + * sum = data_swap + (u64x2) data_key_lo * (u64x2) data_key_hi + */ + uint64x2_t sum_1 = XXH_vmlal_low_u32(data_swap_1, data_key_lo, data_key_hi); + uint64x2_t sum_2 = XXH_vmlal_high_u32(data_swap_2, data_key_lo, data_key_hi); + /* + * Clang reorders + * a += b * c; // umlal swap.2d, dkl.2s, dkh.2s + * c += a; // add acc.2d, acc.2d, swap.2d + * to + * c += a; // add acc.2d, acc.2d, swap.2d + * c += b * c; // umlal acc.2d, dkl.2s, dkh.2s + * + * While it would make sense in theory since the addition is faster, + * for reasons likely related to umlal being limited to certain NEON + * pipelines, this is worse. A compiler guard fixes this. + */ + XXH_COMPILER_GUARD_CLANG_NEON(sum_1); + XXH_COMPILER_GUARD_CLANG_NEON(sum_2); + /* xacc[i] = acc_vec + sum; */ + xacc[i] = vaddq_u64(xacc[i], sum_1); + xacc[i+1] = vaddq_u64(xacc[i+1], sum_2); + } + /* Operate on the remaining NEON lanes 2 at a time. */ + for (; i < XXH3_NEON_LANES / 2; i++) { + /* data_vec = xinput[i]; */ + uint64x2_t data_vec = XXH_vld1q_u64(xinput + (i * 16)); + /* key_vec = xsecret[i]; */ + uint64x2_t key_vec = XXH_vld1q_u64(xsecret + (i * 16)); + /* acc_vec_2 = swap(data_vec) */ + uint64x2_t data_swap = vextq_u64(data_vec, data_vec, 1); + /* data_key = data_vec ^ key_vec; */ + uint64x2_t data_key = veorq_u64(data_vec, key_vec); + /* For two lanes, just use VMOVN and VSHRN. */ + /* data_key_lo = data_key & 0xFFFFFFFF; */ + uint32x2_t data_key_lo = vmovn_u64(data_key); + /* data_key_hi = data_key >> 32; */ + uint32x2_t data_key_hi = vshrn_n_u64(data_key, 32); + /* sum = data_swap + (u64x2) data_key_lo * (u64x2) data_key_hi; */ + uint64x2_t sum = vmlal_u32(data_swap, data_key_lo, data_key_hi); + /* Same Clang workaround as before */ + XXH_COMPILER_GUARD_CLANG_NEON(sum); + /* xacc[i] = acc_vec + sum; */ + xacc[i] = vaddq_u64 (xacc[i], sum); + } + } +} +XXH_FORCE_INLINE XXH3_ACCUMULATE_TEMPLATE(neon) + +XXH_FORCE_INLINE void +XXH3_scrambleAcc_neon(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret) +{ + XXH_ASSERT((((size_t)acc) & 15) == 0); + + { xxh_aliasing_uint64x2_t* xacc = (xxh_aliasing_uint64x2_t*) acc; + uint8_t const* xsecret = (uint8_t const*) secret; + + size_t i; + /* WASM uses operator overloads and doesn't need these. */ +#ifndef __wasm_simd128__ + /* { prime32_1, prime32_1 } */ + uint32x2_t const kPrimeLo = vdup_n_u32(XXH_PRIME32_1); + /* { 0, prime32_1, 0, prime32_1 } */ + uint32x4_t const kPrimeHi = vreinterpretq_u32_u64(vdupq_n_u64((xxh_u64)XXH_PRIME32_1 << 32)); +#endif + + /* AArch64 uses both scalar and neon at the same time */ + for (i = XXH3_NEON_LANES; i < XXH_ACC_NB; i++) { + XXH3_scalarScrambleRound(acc, secret, i); + } + for (i=0; i < XXH3_NEON_LANES / 2; i++) { + /* xacc[i] ^= (xacc[i] >> 47); */ + uint64x2_t acc_vec = xacc[i]; + uint64x2_t shifted = vshrq_n_u64(acc_vec, 47); + uint64x2_t data_vec = veorq_u64(acc_vec, shifted); + + /* xacc[i] ^= xsecret[i]; */ + uint64x2_t key_vec = XXH_vld1q_u64(xsecret + (i * 16)); + uint64x2_t data_key = veorq_u64(data_vec, key_vec); + /* xacc[i] *= XXH_PRIME32_1 */ +#ifdef __wasm_simd128__ + /* SIMD128 has multiply by u64x2, use it instead of expanding and scalarizing */ + xacc[i] = data_key * XXH_PRIME32_1; +#else + /* + * Expanded version with portable NEON intrinsics + * + * lo(x) * lo(y) + (hi(x) * lo(y) << 32) + * + * prod_hi = hi(data_key) * lo(prime) << 32 + * + * Since we only need 32 bits of this multiply a trick can be used, reinterpreting the vector + * as a uint32x4_t and multiplying by { 0, prime, 0, prime } to cancel out the unwanted bits + * and avoid the shift. + */ + uint32x4_t prod_hi = vmulq_u32 (vreinterpretq_u32_u64(data_key), kPrimeHi); + /* Extract low bits for vmlal_u32 */ + uint32x2_t data_key_lo = vmovn_u64(data_key); + /* xacc[i] = prod_hi + lo(data_key) * XXH_PRIME32_1; */ + xacc[i] = vmlal_u32(vreinterpretq_u64_u32(prod_hi), data_key_lo, kPrimeLo); +#endif + } + } +} +#endif + +#if (XXH_VECTOR == XXH_VSX) + +XXH_FORCE_INLINE void +XXH3_accumulate_512_vsx( void* XXH_RESTRICT acc, + const void* XXH_RESTRICT input, + const void* XXH_RESTRICT secret) +{ + /* presumed aligned */ + xxh_aliasing_u64x2* const xacc = (xxh_aliasing_u64x2*) acc; + xxh_u8 const* const xinput = (xxh_u8 const*) input; /* no alignment restriction */ + xxh_u8 const* const xsecret = (xxh_u8 const*) secret; /* no alignment restriction */ + xxh_u64x2 const v32 = { 32, 32 }; + size_t i; + for (i = 0; i < XXH_STRIPE_LEN / sizeof(xxh_u64x2); i++) { + /* data_vec = xinput[i]; */ + xxh_u64x2 const data_vec = XXH_vec_loadu(xinput + 16*i); + /* key_vec = xsecret[i]; */ + xxh_u64x2 const key_vec = XXH_vec_loadu(xsecret + 16*i); + xxh_u64x2 const data_key = data_vec ^ key_vec; + /* shuffled = (data_key << 32) | (data_key >> 32); */ + xxh_u32x4 const shuffled = (xxh_u32x4)vec_rl(data_key, v32); + /* product = ((xxh_u64x2)data_key & 0xFFFFFFFF) * ((xxh_u64x2)shuffled & 0xFFFFFFFF); */ + xxh_u64x2 const product = XXH_vec_mulo((xxh_u32x4)data_key, shuffled); + /* acc_vec = xacc[i]; */ + xxh_u64x2 acc_vec = xacc[i]; + acc_vec += product; + + /* swap high and low halves */ +#ifdef __s390x__ + acc_vec += vec_permi(data_vec, data_vec, 2); +#else + acc_vec += vec_xxpermdi(data_vec, data_vec, 2); +#endif + xacc[i] = acc_vec; + } +} +XXH_FORCE_INLINE XXH3_ACCUMULATE_TEMPLATE(vsx) + +XXH_FORCE_INLINE void +XXH3_scrambleAcc_vsx(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret) +{ + XXH_ASSERT((((size_t)acc) & 15) == 0); + + { xxh_aliasing_u64x2* const xacc = (xxh_aliasing_u64x2*) acc; + const xxh_u8* const xsecret = (const xxh_u8*) secret; + /* constants */ + xxh_u64x2 const v32 = { 32, 32 }; + xxh_u64x2 const v47 = { 47, 47 }; + xxh_u32x4 const prime = { XXH_PRIME32_1, XXH_PRIME32_1, XXH_PRIME32_1, XXH_PRIME32_1 }; + size_t i; + for (i = 0; i < XXH_STRIPE_LEN / sizeof(xxh_u64x2); i++) { + /* xacc[i] ^= (xacc[i] >> 47); */ + xxh_u64x2 const acc_vec = xacc[i]; + xxh_u64x2 const data_vec = acc_vec ^ (acc_vec >> v47); + + /* xacc[i] ^= xsecret[i]; */ + xxh_u64x2 const key_vec = XXH_vec_loadu(xsecret + 16*i); + xxh_u64x2 const data_key = data_vec ^ key_vec; + + /* xacc[i] *= XXH_PRIME32_1 */ + /* prod_lo = ((xxh_u64x2)data_key & 0xFFFFFFFF) * ((xxh_u64x2)prime & 0xFFFFFFFF); */ + xxh_u64x2 const prod_even = XXH_vec_mule((xxh_u32x4)data_key, prime); + /* prod_hi = ((xxh_u64x2)data_key >> 32) * ((xxh_u64x2)prime >> 32); */ + xxh_u64x2 const prod_odd = XXH_vec_mulo((xxh_u32x4)data_key, prime); + xacc[i] = prod_odd + (prod_even << v32); + } } +} + +#endif + +#if (XXH_VECTOR == XXH_SVE) + +XXH_FORCE_INLINE void +XXH3_accumulate_512_sve( void* XXH_RESTRICT acc, + const void* XXH_RESTRICT input, + const void* XXH_RESTRICT secret) +{ + uint64_t *xacc = (uint64_t *)acc; + const uint64_t *xinput = (const uint64_t *)(const void *)input; + const uint64_t *xsecret = (const uint64_t *)(const void *)secret; + svuint64_t kSwap = sveor_n_u64_z(svptrue_b64(), svindex_u64(0, 1), 1); + uint64_t element_count = svcntd(); + if (element_count >= 8) { + svbool_t mask = svptrue_pat_b64(SV_VL8); + svuint64_t vacc = svld1_u64(mask, xacc); + ACCRND(vacc, 0); + svst1_u64(mask, xacc, vacc); + } else if (element_count == 2) { /* sve128 */ + svbool_t mask = svptrue_pat_b64(SV_VL2); + svuint64_t acc0 = svld1_u64(mask, xacc + 0); + svuint64_t acc1 = svld1_u64(mask, xacc + 2); + svuint64_t acc2 = svld1_u64(mask, xacc + 4); + svuint64_t acc3 = svld1_u64(mask, xacc + 6); + ACCRND(acc0, 0); + ACCRND(acc1, 2); + ACCRND(acc2, 4); + ACCRND(acc3, 6); + svst1_u64(mask, xacc + 0, acc0); + svst1_u64(mask, xacc + 2, acc1); + svst1_u64(mask, xacc + 4, acc2); + svst1_u64(mask, xacc + 6, acc3); + } else { + svbool_t mask = svptrue_pat_b64(SV_VL4); + svuint64_t acc0 = svld1_u64(mask, xacc + 0); + svuint64_t acc1 = svld1_u64(mask, xacc + 4); + ACCRND(acc0, 0); + ACCRND(acc1, 4); + svst1_u64(mask, xacc + 0, acc0); + svst1_u64(mask, xacc + 4, acc1); + } +} + +XXH_FORCE_INLINE void +XXH3_accumulate_sve(xxh_u64* XXH_RESTRICT acc, + const xxh_u8* XXH_RESTRICT input, + const xxh_u8* XXH_RESTRICT secret, + size_t nbStripes) +{ + if (nbStripes != 0) { + uint64_t *xacc = (uint64_t *)acc; + const uint64_t *xinput = (const uint64_t *)(const void *)input; + const uint64_t *xsecret = (const uint64_t *)(const void *)secret; + svuint64_t kSwap = sveor_n_u64_z(svptrue_b64(), svindex_u64(0, 1), 1); + uint64_t element_count = svcntd(); + if (element_count >= 8) { + svbool_t mask = svptrue_pat_b64(SV_VL8); + svuint64_t vacc = svld1_u64(mask, xacc + 0); + do { + /* svprfd(svbool_t, void *, enum svfprop); */ + svprfd(mask, xinput + 128, SV_PLDL1STRM); + ACCRND(vacc, 0); + xinput += 8; + xsecret += 1; + nbStripes--; + } while (nbStripes != 0); + + svst1_u64(mask, xacc + 0, vacc); + } else if (element_count == 2) { /* sve128 */ + svbool_t mask = svptrue_pat_b64(SV_VL2); + svuint64_t acc0 = svld1_u64(mask, xacc + 0); + svuint64_t acc1 = svld1_u64(mask, xacc + 2); + svuint64_t acc2 = svld1_u64(mask, xacc + 4); + svuint64_t acc3 = svld1_u64(mask, xacc + 6); + do { + svprfd(mask, xinput + 128, SV_PLDL1STRM); + ACCRND(acc0, 0); + ACCRND(acc1, 2); + ACCRND(acc2, 4); + ACCRND(acc3, 6); + xinput += 8; + xsecret += 1; + nbStripes--; + } while (nbStripes != 0); + + svst1_u64(mask, xacc + 0, acc0); + svst1_u64(mask, xacc + 2, acc1); + svst1_u64(mask, xacc + 4, acc2); + svst1_u64(mask, xacc + 6, acc3); + } else { + svbool_t mask = svptrue_pat_b64(SV_VL4); + svuint64_t acc0 = svld1_u64(mask, xacc + 0); + svuint64_t acc1 = svld1_u64(mask, xacc + 4); + do { + svprfd(mask, xinput + 128, SV_PLDL1STRM); + ACCRND(acc0, 0); + ACCRND(acc1, 4); + xinput += 8; + xsecret += 1; + nbStripes--; + } while (nbStripes != 0); + + svst1_u64(mask, xacc + 0, acc0); + svst1_u64(mask, xacc + 4, acc1); + } + } +} + +#endif + +/* scalar variants - universal */ + +#if defined(__aarch64__) && (defined(__GNUC__) || defined(__clang__)) +/* + * In XXH3_scalarRound(), GCC and Clang have a similar codegen issue, where they + * emit an excess mask and a full 64-bit multiply-add (MADD X-form). + * + * While this might not seem like much, as AArch64 is a 64-bit architecture, only + * big Cortex designs have a full 64-bit multiplier. + * + * On the little cores, the smaller 32-bit multiplier is used, and full 64-bit + * multiplies expand to 2-3 multiplies in microcode. This has a major penalty + * of up to 4 latency cycles and 2 stall cycles in the multiply pipeline. + * + * Thankfully, AArch64 still provides the 32-bit long multiply-add (UMADDL) which does + * not have this penalty and does the mask automatically. + */ +XXH_FORCE_INLINE xxh_u64 +XXH_mult32to64_add64(xxh_u64 lhs, xxh_u64 rhs, xxh_u64 acc) +{ + xxh_u64 ret; + /* note: %x = 64-bit register, %w = 32-bit register */ + __asm__("umaddl %x0, %w1, %w2, %x3" : "=r" (ret) : "r" (lhs), "r" (rhs), "r" (acc)); + return ret; +} +#else +XXH_FORCE_INLINE xxh_u64 +XXH_mult32to64_add64(xxh_u64 lhs, xxh_u64 rhs, xxh_u64 acc) +{ + return XXH_mult32to64((xxh_u32)lhs, (xxh_u32)rhs) + acc; +} +#endif + +/*! + * @internal + * @brief Scalar round for @ref XXH3_accumulate_512_scalar(). + * + * This is extracted to its own function because the NEON path uses a combination + * of NEON and scalar. + */ +XXH_FORCE_INLINE void +XXH3_scalarRound(void* XXH_RESTRICT acc, + void const* XXH_RESTRICT input, + void const* XXH_RESTRICT secret, + size_t lane) +{ + xxh_u64* xacc = (xxh_u64*) acc; + xxh_u8 const* xinput = (xxh_u8 const*) input; + xxh_u8 const* xsecret = (xxh_u8 const*) secret; + XXH_ASSERT(lane < XXH_ACC_NB); + XXH_ASSERT(((size_t)acc & (XXH_ACC_ALIGN-1)) == 0); + { + xxh_u64 const data_val = XXH_readLE64(xinput + lane * 8); + xxh_u64 const data_key = data_val ^ XXH_readLE64(xsecret + lane * 8); + xacc[lane ^ 1] += data_val; /* swap adjacent lanes */ + xacc[lane] = XXH_mult32to64_add64(data_key /* & 0xFFFFFFFF */, data_key >> 32, xacc[lane]); + } +} + +/*! + * @internal + * @brief Processes a 64 byte block of data using the scalar path. + */ +XXH_FORCE_INLINE void +XXH3_accumulate_512_scalar(void* XXH_RESTRICT acc, + const void* XXH_RESTRICT input, + const void* XXH_RESTRICT secret) +{ + size_t i; + /* ARM GCC refuses to unroll this loop, resulting in a 24% slowdown on ARMv6. */ +#if defined(__GNUC__) && !defined(__clang__) \ + && (defined(__arm__) || defined(__thumb2__)) \ + && defined(__ARM_FEATURE_UNALIGNED) /* no unaligned access just wastes bytes */ \ + && XXH_SIZE_OPT <= 0 +# pragma GCC unroll 8 +#endif + for (i=0; i < XXH_ACC_NB; i++) { + XXH3_scalarRound(acc, input, secret, i); + } +} +XXH_FORCE_INLINE XXH3_ACCUMULATE_TEMPLATE(scalar) + +/*! + * @internal + * @brief Scalar scramble step for @ref XXH3_scrambleAcc_scalar(). + * + * This is extracted to its own function because the NEON path uses a combination + * of NEON and scalar. + */ +XXH_FORCE_INLINE void +XXH3_scalarScrambleRound(void* XXH_RESTRICT acc, + void const* XXH_RESTRICT secret, + size_t lane) +{ + xxh_u64* const xacc = (xxh_u64*) acc; /* presumed aligned */ + const xxh_u8* const xsecret = (const xxh_u8*) secret; /* no alignment restriction */ + XXH_ASSERT((((size_t)acc) & (XXH_ACC_ALIGN-1)) == 0); + XXH_ASSERT(lane < XXH_ACC_NB); + { + xxh_u64 const key64 = XXH_readLE64(xsecret + lane * 8); + xxh_u64 acc64 = xacc[lane]; + acc64 = XXH_xorshift64(acc64, 47); + acc64 ^= key64; + acc64 *= XXH_PRIME32_1; + xacc[lane] = acc64; + } +} + +/*! + * @internal + * @brief Scrambles the accumulators after a large chunk has been read + */ +XXH_FORCE_INLINE void +XXH3_scrambleAcc_scalar(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret) +{ + size_t i; + for (i=0; i < XXH_ACC_NB; i++) { + XXH3_scalarScrambleRound(acc, secret, i); + } +} + +XXH_FORCE_INLINE void +XXH3_initCustomSecret_scalar(void* XXH_RESTRICT customSecret, xxh_u64 seed64) +{ + /* + * We need a separate pointer for the hack below, + * which requires a non-const pointer. + * Any decent compiler will optimize this out otherwise. + */ + const xxh_u8* kSecretPtr = XXH3_kSecret; + XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 15) == 0); + +#if defined(__GNUC__) && defined(__aarch64__) + /* + * UGLY HACK: + * GCC and Clang generate a bunch of MOV/MOVK pairs for aarch64, and they are + * placed sequentially, in order, at the top of the unrolled loop. + * + * While MOVK is great for generating constants (2 cycles for a 64-bit + * constant compared to 4 cycles for LDR), it fights for bandwidth with + * the arithmetic instructions. + * + * I L S + * MOVK + * MOVK + * MOVK + * MOVK + * ADD + * SUB STR + * STR + * By forcing loads from memory (as the asm line causes the compiler to assume + * that XXH3_kSecretPtr has been changed), the pipelines are used more + * efficiently: + * I L S + * LDR + * ADD LDR + * SUB STR + * STR + * + * See XXH3_NEON_LANES for details on the pipsline. + * + * XXH3_64bits_withSeed, len == 256, Snapdragon 835 + * without hack: 2654.4 MB/s + * with hack: 3202.9 MB/s + */ + XXH_COMPILER_GUARD(kSecretPtr); +#endif + { int const nbRounds = XXH_SECRET_DEFAULT_SIZE / 16; + int i; + for (i=0; i < nbRounds; i++) { + /* + * The asm hack causes the compiler to assume that kSecretPtr aliases with + * customSecret, and on aarch64, this prevented LDP from merging two + * loads together for free. Putting the loads together before the stores + * properly generates LDP. + */ + xxh_u64 lo = XXH_readLE64(kSecretPtr + 16*i) + seed64; + xxh_u64 hi = XXH_readLE64(kSecretPtr + 16*i + 8) - seed64; + XXH_writeLE64((xxh_u8*)customSecret + 16*i, lo); + XXH_writeLE64((xxh_u8*)customSecret + 16*i + 8, hi); + } } +} + + +typedef void (*XXH3_f_accumulate)(xxh_u64* XXH_RESTRICT, const xxh_u8* XXH_RESTRICT, const xxh_u8* XXH_RESTRICT, size_t); +typedef void (*XXH3_f_scrambleAcc)(void* XXH_RESTRICT, const void*); +typedef void (*XXH3_f_initCustomSecret)(void* XXH_RESTRICT, xxh_u64); + + +#if (XXH_VECTOR == XXH_AVX512) + +#define XXH3_accumulate_512 XXH3_accumulate_512_avx512 +#define XXH3_accumulate XXH3_accumulate_avx512 +#define XXH3_scrambleAcc XXH3_scrambleAcc_avx512 +#define XXH3_initCustomSecret XXH3_initCustomSecret_avx512 + +#elif (XXH_VECTOR == XXH_AVX2) + +#define XXH3_accumulate_512 XXH3_accumulate_512_avx2 +#define XXH3_accumulate XXH3_accumulate_avx2 +#define XXH3_scrambleAcc XXH3_scrambleAcc_avx2 +#define XXH3_initCustomSecret XXH3_initCustomSecret_avx2 + +#elif (XXH_VECTOR == XXH_SSE2) + +#define XXH3_accumulate_512 XXH3_accumulate_512_sse2 +#define XXH3_accumulate XXH3_accumulate_sse2 +#define XXH3_scrambleAcc XXH3_scrambleAcc_sse2 +#define XXH3_initCustomSecret XXH3_initCustomSecret_sse2 + +#elif (XXH_VECTOR == XXH_NEON) + +#define XXH3_accumulate_512 XXH3_accumulate_512_neon +#define XXH3_accumulate XXH3_accumulate_neon +#define XXH3_scrambleAcc XXH3_scrambleAcc_neon +#define XXH3_initCustomSecret XXH3_initCustomSecret_scalar + +#elif (XXH_VECTOR == XXH_VSX) + +#define XXH3_accumulate_512 XXH3_accumulate_512_vsx +#define XXH3_accumulate XXH3_accumulate_vsx +#define XXH3_scrambleAcc XXH3_scrambleAcc_vsx +#define XXH3_initCustomSecret XXH3_initCustomSecret_scalar + +#elif (XXH_VECTOR == XXH_SVE) +#define XXH3_accumulate_512 XXH3_accumulate_512_sve +#define XXH3_accumulate XXH3_accumulate_sve +#define XXH3_scrambleAcc XXH3_scrambleAcc_scalar +#define XXH3_initCustomSecret XXH3_initCustomSecret_scalar + +#else /* scalar */ + +#define XXH3_accumulate_512 XXH3_accumulate_512_scalar +#define XXH3_accumulate XXH3_accumulate_scalar +#define XXH3_scrambleAcc XXH3_scrambleAcc_scalar +#define XXH3_initCustomSecret XXH3_initCustomSecret_scalar + +#endif + +#if XXH_SIZE_OPT >= 1 /* don't do SIMD for initialization */ +# undef XXH3_initCustomSecret +# define XXH3_initCustomSecret XXH3_initCustomSecret_scalar +#endif + +XXH_FORCE_INLINE void +XXH3_hashLong_internal_loop(xxh_u64* XXH_RESTRICT acc, + const xxh_u8* XXH_RESTRICT input, size_t len, + const xxh_u8* XXH_RESTRICT secret, size_t secretSize, + XXH3_f_accumulate f_acc, + XXH3_f_scrambleAcc f_scramble) +{ + size_t const nbStripesPerBlock = (secretSize - XXH_STRIPE_LEN) / XXH_SECRET_CONSUME_RATE; + size_t const block_len = XXH_STRIPE_LEN * nbStripesPerBlock; + size_t const nb_blocks = (len - 1) / block_len; + + size_t n; + + XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); + + for (n = 0; n < nb_blocks; n++) { + f_acc(acc, input + n*block_len, secret, nbStripesPerBlock); + f_scramble(acc, secret + secretSize - XXH_STRIPE_LEN); + } + + /* last partial block */ + XXH_ASSERT(len > XXH_STRIPE_LEN); + { size_t const nbStripes = ((len - 1) - (block_len * nb_blocks)) / XXH_STRIPE_LEN; + XXH_ASSERT(nbStripes <= (secretSize / XXH_SECRET_CONSUME_RATE)); + f_acc(acc, input + nb_blocks*block_len, secret, nbStripes); + + /* last stripe */ + { const xxh_u8* const p = input + len - XXH_STRIPE_LEN; +#define XXH_SECRET_LASTACC_START 7 /* not aligned on 8, last secret is different from acc & scrambler */ + XXH3_accumulate_512(acc, p, secret + secretSize - XXH_STRIPE_LEN - XXH_SECRET_LASTACC_START); + } } +} + +XXH_FORCE_INLINE xxh_u64 +XXH3_mix2Accs(const xxh_u64* XXH_RESTRICT acc, const xxh_u8* XXH_RESTRICT secret) +{ + return XXH3_mul128_fold64( + acc[0] ^ XXH_readLE64(secret), + acc[1] ^ XXH_readLE64(secret+8) ); +} + +static XXH64_hash_t +XXH3_mergeAccs(const xxh_u64* XXH_RESTRICT acc, const xxh_u8* XXH_RESTRICT secret, xxh_u64 start) +{ + xxh_u64 result64 = start; + size_t i = 0; + + for (i = 0; i < 4; i++) { + result64 += XXH3_mix2Accs(acc+2*i, secret + 16*i); +#if defined(__clang__) /* Clang */ \ + && (defined(__arm__) || defined(__thumb__)) /* ARMv7 */ \ + && (defined(__ARM_NEON) || defined(__ARM_NEON__)) /* NEON */ \ + && !defined(XXH_ENABLE_AUTOVECTORIZE) /* Define to disable */ + /* + * UGLY HACK: + * Prevent autovectorization on Clang ARMv7-a. Exact same problem as + * the one in XXH3_len_129to240_64b. Speeds up shorter keys > 240b. + * XXH3_64bits, len == 256, Snapdragon 835: + * without hack: 2063.7 MB/s + * with hack: 2560.7 MB/s + */ + XXH_COMPILER_GUARD(result64); +#endif + } + + return XXH3_avalanche(result64); +} + +#define XXH3_INIT_ACC { XXH_PRIME32_3, XXH_PRIME64_1, XXH_PRIME64_2, XXH_PRIME64_3, \ + XXH_PRIME64_4, XXH_PRIME32_2, XXH_PRIME64_5, XXH_PRIME32_1 } + +XXH_FORCE_INLINE XXH64_hash_t +XXH3_hashLong_64b_internal(const void* XXH_RESTRICT input, size_t len, + const void* XXH_RESTRICT secret, size_t secretSize, + XXH3_f_accumulate f_acc, + XXH3_f_scrambleAcc f_scramble) +{ + XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[XXH_ACC_NB] = XXH3_INIT_ACC; + + XXH3_hashLong_internal_loop(acc, (const xxh_u8*)input, len, (const xxh_u8*)secret, secretSize, f_acc, f_scramble); + + /* converge into final hash */ + XXH_STATIC_ASSERT(sizeof(acc) == 64); + /* do not align on 8, so that the secret is different from the accumulator */ +#define XXH_SECRET_MERGEACCS_START 11 + XXH_ASSERT(secretSize >= sizeof(acc) + XXH_SECRET_MERGEACCS_START); + return XXH3_mergeAccs(acc, (const xxh_u8*)secret + XXH_SECRET_MERGEACCS_START, (xxh_u64)len * XXH_PRIME64_1); +} + +/* + * It's important for performance to transmit secret's size (when it's static) + * so that the compiler can properly optimize the vectorized loop. + * This makes a big performance difference for "medium" keys (<1 KB) when using AVX instruction set. + * When the secret size is unknown, or on GCC 12 where the mix of NO_INLINE and FORCE_INLINE + * breaks -Og, this is XXH_NO_INLINE. + */ +XXH3_WITH_SECRET_INLINE XXH64_hash_t +XXH3_hashLong_64b_withSecret(const void* XXH_RESTRICT input, size_t len, + XXH64_hash_t seed64, const xxh_u8* XXH_RESTRICT secret, size_t secretLen) +{ + (void)seed64; + return XXH3_hashLong_64b_internal(input, len, secret, secretLen, XXH3_accumulate, XXH3_scrambleAcc); +} + +/* + * It's preferable for performance that XXH3_hashLong is not inlined, + * as it results in a smaller function for small data, easier to the instruction cache. + * Note that inside this no_inline function, we do inline the internal loop, + * and provide a statically defined secret size to allow optimization of vector loop. + */ +XXH_NO_INLINE XXH_PUREF XXH64_hash_t +XXH3_hashLong_64b_default(const void* XXH_RESTRICT input, size_t len, + XXH64_hash_t seed64, const xxh_u8* XXH_RESTRICT secret, size_t secretLen) +{ + (void)seed64; (void)secret; (void)secretLen; + return XXH3_hashLong_64b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_accumulate, XXH3_scrambleAcc); +} + +/* + * XXH3_hashLong_64b_withSeed(): + * Generate a custom key based on alteration of default XXH3_kSecret with the seed, + * and then use this key for long mode hashing. + * + * This operation is decently fast but nonetheless costs a little bit of time. + * Try to avoid it whenever possible (typically when seed==0). + * + * It's important for performance that XXH3_hashLong is not inlined. Not sure + * why (uop cache maybe?), but the difference is large and easily measurable. + */ +XXH_FORCE_INLINE XXH64_hash_t +XXH3_hashLong_64b_withSeed_internal(const void* input, size_t len, + XXH64_hash_t seed, + XXH3_f_accumulate f_acc, + XXH3_f_scrambleAcc f_scramble, + XXH3_f_initCustomSecret f_initSec) +{ +#if XXH_SIZE_OPT <= 0 + if (seed == 0) + return XXH3_hashLong_64b_internal(input, len, + XXH3_kSecret, sizeof(XXH3_kSecret), + f_acc, f_scramble); +#endif + { XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE]; + f_initSec(secret, seed); + return XXH3_hashLong_64b_internal(input, len, secret, sizeof(secret), + f_acc, f_scramble); + } +} + +/* + * It's important for performance that XXH3_hashLong is not inlined. + */ +XXH_NO_INLINE XXH64_hash_t +XXH3_hashLong_64b_withSeed(const void* XXH_RESTRICT input, size_t len, + XXH64_hash_t seed, const xxh_u8* XXH_RESTRICT secret, size_t secretLen) +{ + (void)secret; (void)secretLen; + return XXH3_hashLong_64b_withSeed_internal(input, len, seed, + XXH3_accumulate, XXH3_scrambleAcc, XXH3_initCustomSecret); +} + + +typedef XXH64_hash_t (*XXH3_hashLong64_f)(const void* XXH_RESTRICT, size_t, + XXH64_hash_t, const xxh_u8* XXH_RESTRICT, size_t); + +XXH_FORCE_INLINE XXH64_hash_t +XXH3_64bits_internal(const void* XXH_RESTRICT input, size_t len, + XXH64_hash_t seed64, const void* XXH_RESTRICT secret, size_t secretLen, + XXH3_hashLong64_f f_hashLong) +{ + XXH_ASSERT(secretLen >= XXH3_SECRET_SIZE_MIN); + /* + * If an action is to be taken if `secretLen` condition is not respected, + * it should be done here. + * For now, it's a contract pre-condition. + * Adding a check and a branch here would cost performance at every hash. + * Also, note that function signature doesn't offer room to return an error. + */ + if (len <= 16) + return XXH3_len_0to16_64b((const xxh_u8*)input, len, (const xxh_u8*)secret, seed64); + if (len <= 128) + return XXH3_len_17to128_64b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64); + if (len <= XXH3_MIDSIZE_MAX) + return XXH3_len_129to240_64b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64); + return f_hashLong(input, len, seed64, (const xxh_u8*)secret, secretLen); +} + + +/* === Public entry point === */ + +/*! @ingroup XXH3_family */ +XXH_PUBLIC_API XXH64_hash_t XXH3_64bits(XXH_NOESCAPE const void* input, size_t length) +{ + return XXH3_64bits_internal(input, length, 0, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_default); +} + +/*! @ingroup XXH3_family */ +XXH_PUBLIC_API XXH64_hash_t +XXH3_64bits_withSecret(XXH_NOESCAPE const void* input, size_t length, XXH_NOESCAPE const void* secret, size_t secretSize) +{ + return XXH3_64bits_internal(input, length, 0, secret, secretSize, XXH3_hashLong_64b_withSecret); +} + +/*! @ingroup XXH3_family */ +XXH_PUBLIC_API XXH64_hash_t +XXH3_64bits_withSeed(XXH_NOESCAPE const void* input, size_t length, XXH64_hash_t seed) +{ + return XXH3_64bits_internal(input, length, seed, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_withSeed); +} + +XXH_PUBLIC_API XXH64_hash_t +XXH3_64bits_withSecretandSeed(XXH_NOESCAPE const void* input, size_t length, XXH_NOESCAPE const void* secret, size_t secretSize, XXH64_hash_t seed) +{ + if (length <= XXH3_MIDSIZE_MAX) + return XXH3_64bits_internal(input, length, seed, XXH3_kSecret, sizeof(XXH3_kSecret), NULL); + return XXH3_hashLong_64b_withSecret(input, length, seed, (const xxh_u8*)secret, secretSize); +} + + +/* === XXH3 streaming === */ +#ifndef XXH_NO_STREAM +/* + * Malloc's a pointer that is always aligned to align. + * + * This must be freed with `XXH_alignedFree()`. + * + * malloc typically guarantees 16 byte alignment on 64-bit systems and 8 byte + * alignment on 32-bit. This isn't enough for the 32 byte aligned loads in AVX2 + * or on 32-bit, the 16 byte aligned loads in SSE2 and NEON. + * + * This underalignment previously caused a rather obvious crash which went + * completely unnoticed due to XXH3_createState() not actually being tested. + * Credit to RedSpah for noticing this bug. + * + * The alignment is done manually: Functions like posix_memalign or _mm_malloc + * are avoided: To maintain portability, we would have to write a fallback + * like this anyways, and besides, testing for the existence of library + * functions without relying on external build tools is impossible. + * + * The method is simple: Overallocate, manually align, and store the offset + * to the original behind the returned pointer. + * + * Align must be a power of 2 and 8 <= align <= 128. + */ +static XXH_MALLOCF void* XXH_alignedMalloc(size_t s, size_t align) +{ + XXH_ASSERT(align <= 128 && align >= 8); /* range check */ + XXH_ASSERT((align & (align-1)) == 0); /* power of 2 */ + XXH_ASSERT(s != 0 && s < (s + align)); /* empty/overflow */ + { /* Overallocate to make room for manual realignment and an offset byte */ + xxh_u8* base = (xxh_u8*)XXH_malloc(s + align); + if (base != NULL) { + /* + * Get the offset needed to align this pointer. + * + * Even if the returned pointer is aligned, there will always be + * at least one byte to store the offset to the original pointer. + */ + size_t offset = align - ((size_t)base & (align - 1)); /* base % align */ + /* Add the offset for the now-aligned pointer */ + xxh_u8* ptr = base + offset; + + XXH_ASSERT((size_t)ptr % align == 0); + + /* Store the offset immediately before the returned pointer. */ + ptr[-1] = (xxh_u8)offset; + return ptr; + } + return NULL; + } +} +/* + * Frees an aligned pointer allocated by XXH_alignedMalloc(). Don't pass + * normal malloc'd pointers, XXH_alignedMalloc has a specific data layout. + */ +static void XXH_alignedFree(void* p) +{ + if (p != NULL) { + xxh_u8* ptr = (xxh_u8*)p; + /* Get the offset byte we added in XXH_malloc. */ + xxh_u8 offset = ptr[-1]; + /* Free the original malloc'd pointer */ + xxh_u8* base = ptr - offset; + XXH_free(base); + } +} +/*! @ingroup XXH3_family */ +/*! + * @brief Allocate an @ref XXH3_state_t. + * + * @return An allocated pointer of @ref XXH3_state_t on success. + * @return `NULL` on failure. + * + * @note Must be freed with XXH3_freeState(). + */ +XXH_PUBLIC_API XXH3_state_t* XXH3_createState(void) +{ + XXH3_state_t* const state = (XXH3_state_t*)XXH_alignedMalloc(sizeof(XXH3_state_t), 64); + if (state==NULL) return NULL; + XXH3_INITSTATE(state); + return state; +} + +/*! @ingroup XXH3_family */ +/*! + * @brief Frees an @ref XXH3_state_t. + * + * @param statePtr A pointer to an @ref XXH3_state_t allocated with @ref XXH3_createState(). + * + * @return @ref XXH_OK. + * + * @note Must be allocated with XXH3_createState(). + */ +XXH_PUBLIC_API XXH_errorcode XXH3_freeState(XXH3_state_t* statePtr) +{ + XXH_alignedFree(statePtr); + return XXH_OK; +} + +/*! @ingroup XXH3_family */ +XXH_PUBLIC_API void +XXH3_copyState(XXH_NOESCAPE XXH3_state_t* dst_state, XXH_NOESCAPE const XXH3_state_t* src_state) +{ + XXH_memcpy(dst_state, src_state, sizeof(*dst_state)); +} + +static void +XXH3_reset_internal(XXH3_state_t* statePtr, + XXH64_hash_t seed, + const void* secret, size_t secretSize) +{ + size_t const initStart = offsetof(XXH3_state_t, bufferedSize); + size_t const initLength = offsetof(XXH3_state_t, nbStripesPerBlock) - initStart; + XXH_ASSERT(offsetof(XXH3_state_t, nbStripesPerBlock) > initStart); + XXH_ASSERT(statePtr != NULL); + /* set members from bufferedSize to nbStripesPerBlock (excluded) to 0 */ + memset((char*)statePtr + initStart, 0, initLength); + statePtr->acc[0] = XXH_PRIME32_3; + statePtr->acc[1] = XXH_PRIME64_1; + statePtr->acc[2] = XXH_PRIME64_2; + statePtr->acc[3] = XXH_PRIME64_3; + statePtr->acc[4] = XXH_PRIME64_4; + statePtr->acc[5] = XXH_PRIME32_2; + statePtr->acc[6] = XXH_PRIME64_5; + statePtr->acc[7] = XXH_PRIME32_1; + statePtr->seed = seed; + statePtr->useSeed = (seed != 0); + statePtr->extSecret = (const unsigned char*)secret; + XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); + statePtr->secretLimit = secretSize - XXH_STRIPE_LEN; + statePtr->nbStripesPerBlock = statePtr->secretLimit / XXH_SECRET_CONSUME_RATE; +} + +/*! @ingroup XXH3_family */ +XXH_PUBLIC_API XXH_errorcode +XXH3_64bits_reset(XXH_NOESCAPE XXH3_state_t* statePtr) +{ + if (statePtr == NULL) return XXH_ERROR; + XXH3_reset_internal(statePtr, 0, XXH3_kSecret, XXH_SECRET_DEFAULT_SIZE); + return XXH_OK; +} + +/*! @ingroup XXH3_family */ +XXH_PUBLIC_API XXH_errorcode +XXH3_64bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize) +{ + if (statePtr == NULL) return XXH_ERROR; + XXH3_reset_internal(statePtr, 0, secret, secretSize); + if (secret == NULL) return XXH_ERROR; + if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR; + return XXH_OK; +} + +/*! @ingroup XXH3_family */ +XXH_PUBLIC_API XXH_errorcode +XXH3_64bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH64_hash_t seed) +{ + if (statePtr == NULL) return XXH_ERROR; + if (seed==0) return XXH3_64bits_reset(statePtr); + if ((seed != statePtr->seed) || (statePtr->extSecret != NULL)) + XXH3_initCustomSecret(statePtr->customSecret, seed); + XXH3_reset_internal(statePtr, seed, NULL, XXH_SECRET_DEFAULT_SIZE); + return XXH_OK; +} + +/*! @ingroup XXH3_family */ +XXH_PUBLIC_API XXH_errorcode +XXH3_64bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize, XXH64_hash_t seed64) +{ + if (statePtr == NULL) return XXH_ERROR; + if (secret == NULL) return XXH_ERROR; + if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR; + XXH3_reset_internal(statePtr, seed64, secret, secretSize); + statePtr->useSeed = 1; /* always, even if seed64==0 */ + return XXH_OK; +} + +/*! + * @internal + * @brief Processes a large input for XXH3_update() and XXH3_digest_long(). + * + * Unlike XXH3_hashLong_internal_loop(), this can process data that overlaps a block. + * + * @param acc Pointer to the 8 accumulator lanes + * @param nbStripesSoFarPtr In/out pointer to the number of leftover stripes in the block* + * @param nbStripesPerBlock Number of stripes in a block + * @param input Input pointer + * @param nbStripes Number of stripes to process + * @param secret Secret pointer + * @param secretLimit Offset of the last block in @p secret + * @param f_acc Pointer to an XXH3_accumulate implementation + * @param f_scramble Pointer to an XXH3_scrambleAcc implementation + * @return Pointer past the end of @p input after processing + */ +XXH_FORCE_INLINE const xxh_u8 * +XXH3_consumeStripes(xxh_u64* XXH_RESTRICT acc, + size_t* XXH_RESTRICT nbStripesSoFarPtr, size_t nbStripesPerBlock, + const xxh_u8* XXH_RESTRICT input, size_t nbStripes, + const xxh_u8* XXH_RESTRICT secret, size_t secretLimit, + XXH3_f_accumulate f_acc, + XXH3_f_scrambleAcc f_scramble) +{ + const xxh_u8* initialSecret = secret + *nbStripesSoFarPtr * XXH_SECRET_CONSUME_RATE; + /* Process full blocks */ + if (nbStripes >= (nbStripesPerBlock - *nbStripesSoFarPtr)) { + /* Process the initial partial block... */ + size_t nbStripesThisIter = nbStripesPerBlock - *nbStripesSoFarPtr; + + do { + /* Accumulate and scramble */ + f_acc(acc, input, initialSecret, nbStripesThisIter); + f_scramble(acc, secret + secretLimit); + input += nbStripesThisIter * XXH_STRIPE_LEN; + nbStripes -= nbStripesThisIter; + /* Then continue the loop with the full block size */ + nbStripesThisIter = nbStripesPerBlock; + initialSecret = secret; + } while (nbStripes >= nbStripesPerBlock); + *nbStripesSoFarPtr = 0; + } + /* Process a partial block */ + if (nbStripes > 0) { + f_acc(acc, input, initialSecret, nbStripes); + input += nbStripes * XXH_STRIPE_LEN; + *nbStripesSoFarPtr += nbStripes; + } + /* Return end pointer */ + return input; +} + +#ifndef XXH3_STREAM_USE_STACK +# if XXH_SIZE_OPT <= 0 && !defined(__clang__) /* clang doesn't need additional stack space */ +# define XXH3_STREAM_USE_STACK 1 +# endif +#endif +/* + * Both XXH3_64bits_update and XXH3_128bits_update use this routine. + */ +XXH_FORCE_INLINE XXH_errorcode +XXH3_update(XXH3_state_t* XXH_RESTRICT const state, + const xxh_u8* XXH_RESTRICT input, size_t len, + XXH3_f_accumulate f_acc, + XXH3_f_scrambleAcc f_scramble) +{ + if (input==NULL) { + XXH_ASSERT(len == 0); + return XXH_OK; + } + + XXH_ASSERT(state != NULL); + { const xxh_u8* const bEnd = input + len; + const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret; +#if defined(XXH3_STREAM_USE_STACK) && XXH3_STREAM_USE_STACK >= 1 + /* For some reason, gcc and MSVC seem to suffer greatly + * when operating accumulators directly into state. + * Operating into stack space seems to enable proper optimization. + * clang, on the other hand, doesn't seem to need this trick */ + XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[8]; + XXH_memcpy(acc, state->acc, sizeof(acc)); +#else + xxh_u64* XXH_RESTRICT const acc = state->acc; +#endif + state->totalLen += len; + XXH_ASSERT(state->bufferedSize <= XXH3_INTERNALBUFFER_SIZE); + + /* small input : just fill in tmp buffer */ + if (len <= XXH3_INTERNALBUFFER_SIZE - state->bufferedSize) { + XXH_memcpy(state->buffer + state->bufferedSize, input, len); + state->bufferedSize += (XXH32_hash_t)len; + return XXH_OK; + } + + /* total input is now > XXH3_INTERNALBUFFER_SIZE */ + #define XXH3_INTERNALBUFFER_STRIPES (XXH3_INTERNALBUFFER_SIZE / XXH_STRIPE_LEN) + XXH_STATIC_ASSERT(XXH3_INTERNALBUFFER_SIZE % XXH_STRIPE_LEN == 0); /* clean multiple */ + + /* + * Internal buffer is partially filled (always, except at beginning) + * Complete it, then consume it. + */ + if (state->bufferedSize) { + size_t const loadSize = XXH3_INTERNALBUFFER_SIZE - state->bufferedSize; + XXH_memcpy(state->buffer + state->bufferedSize, input, loadSize); + input += loadSize; + XXH3_consumeStripes(acc, + &state->nbStripesSoFar, state->nbStripesPerBlock, + state->buffer, XXH3_INTERNALBUFFER_STRIPES, + secret, state->secretLimit, + f_acc, f_scramble); + state->bufferedSize = 0; + } + XXH_ASSERT(input < bEnd); + if (bEnd - input > XXH3_INTERNALBUFFER_SIZE) { + size_t nbStripes = (size_t)(bEnd - 1 - input) / XXH_STRIPE_LEN; + input = XXH3_consumeStripes(acc, + &state->nbStripesSoFar, state->nbStripesPerBlock, + input, nbStripes, + secret, state->secretLimit, + f_acc, f_scramble); + XXH_memcpy(state->buffer + sizeof(state->buffer) - XXH_STRIPE_LEN, input - XXH_STRIPE_LEN, XXH_STRIPE_LEN); + + } + /* Some remaining input (always) : buffer it */ + XXH_ASSERT(input < bEnd); + XXH_ASSERT(bEnd - input <= XXH3_INTERNALBUFFER_SIZE); + XXH_ASSERT(state->bufferedSize == 0); + XXH_memcpy(state->buffer, input, (size_t)(bEnd-input)); + state->bufferedSize = (XXH32_hash_t)(bEnd-input); +#if defined(XXH3_STREAM_USE_STACK) && XXH3_STREAM_USE_STACK >= 1 + /* save stack accumulators into state */ + XXH_memcpy(state->acc, acc, sizeof(acc)); +#endif + } + + return XXH_OK; +} + +/*! @ingroup XXH3_family */ +XXH_PUBLIC_API XXH_errorcode +XXH3_64bits_update(XXH_NOESCAPE XXH3_state_t* state, XXH_NOESCAPE const void* input, size_t len) +{ + return XXH3_update(state, (const xxh_u8*)input, len, + XXH3_accumulate, XXH3_scrambleAcc); +} + + +XXH_FORCE_INLINE void +XXH3_digest_long (XXH64_hash_t* acc, + const XXH3_state_t* state, + const unsigned char* secret) +{ + xxh_u8 lastStripe[XXH_STRIPE_LEN]; + const xxh_u8* lastStripePtr; + + /* + * Digest on a local copy. This way, the state remains unaltered, and it can + * continue ingesting more input afterwards. + */ + XXH_memcpy(acc, state->acc, sizeof(state->acc)); + if (state->bufferedSize >= XXH_STRIPE_LEN) { + /* Consume remaining stripes then point to remaining data in buffer */ + size_t const nbStripes = (state->bufferedSize - 1) / XXH_STRIPE_LEN; + size_t nbStripesSoFar = state->nbStripesSoFar; + XXH3_consumeStripes(acc, + &nbStripesSoFar, state->nbStripesPerBlock, + state->buffer, nbStripes, + secret, state->secretLimit, + XXH3_accumulate, XXH3_scrambleAcc); + lastStripePtr = state->buffer + state->bufferedSize - XXH_STRIPE_LEN; + } else { /* bufferedSize < XXH_STRIPE_LEN */ + /* Copy to temp buffer */ + size_t const catchupSize = XXH_STRIPE_LEN - state->bufferedSize; + XXH_ASSERT(state->bufferedSize > 0); /* there is always some input buffered */ + XXH_memcpy(lastStripe, state->buffer + sizeof(state->buffer) - catchupSize, catchupSize); + XXH_memcpy(lastStripe + catchupSize, state->buffer, state->bufferedSize); + lastStripePtr = lastStripe; + } + /* Last stripe */ + XXH3_accumulate_512(acc, + lastStripePtr, + secret + state->secretLimit - XXH_SECRET_LASTACC_START); +} + +/*! @ingroup XXH3_family */ +XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_digest (XXH_NOESCAPE const XXH3_state_t* state) +{ + const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret; + if (state->totalLen > XXH3_MIDSIZE_MAX) { + XXH_ALIGN(XXH_ACC_ALIGN) XXH64_hash_t acc[XXH_ACC_NB]; + XXH3_digest_long(acc, state, secret); + return XXH3_mergeAccs(acc, + secret + XXH_SECRET_MERGEACCS_START, + (xxh_u64)state->totalLen * XXH_PRIME64_1); + } + /* totalLen <= XXH3_MIDSIZE_MAX: digesting a short input */ + if (state->useSeed) + return XXH3_64bits_withSeed(state->buffer, (size_t)state->totalLen, state->seed); + return XXH3_64bits_withSecret(state->buffer, (size_t)(state->totalLen), + secret, state->secretLimit + XXH_STRIPE_LEN); +} +#endif /* !XXH_NO_STREAM */ + + +/* ========================================== + * XXH3 128 bits (a.k.a XXH128) + * ========================================== + * XXH3's 128-bit variant has better mixing and strength than the 64-bit variant, + * even without counting the significantly larger output size. + * + * For example, extra steps are taken to avoid the seed-dependent collisions + * in 17-240 byte inputs (See XXH3_mix16B and XXH128_mix32B). + * + * This strength naturally comes at the cost of some speed, especially on short + * lengths. Note that longer hashes are about as fast as the 64-bit version + * due to it using only a slight modification of the 64-bit loop. + * + * XXH128 is also more oriented towards 64-bit machines. It is still extremely + * fast for a _128-bit_ hash on 32-bit (it usually clears XXH64). + */ + +XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t +XXH3_len_1to3_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed) +{ + /* A doubled version of 1to3_64b with different constants. */ + XXH_ASSERT(input != NULL); + XXH_ASSERT(1 <= len && len <= 3); + XXH_ASSERT(secret != NULL); + /* + * len = 1: combinedl = { input[0], 0x01, input[0], input[0] } + * len = 2: combinedl = { input[1], 0x02, input[0], input[1] } + * len = 3: combinedl = { input[2], 0x03, input[0], input[1] } + */ + { xxh_u8 const c1 = input[0]; + xxh_u8 const c2 = input[len >> 1]; + xxh_u8 const c3 = input[len - 1]; + xxh_u32 const combinedl = ((xxh_u32)c1 <<16) | ((xxh_u32)c2 << 24) + | ((xxh_u32)c3 << 0) | ((xxh_u32)len << 8); + xxh_u32 const combinedh = XXH_rotl32(XXH_swap32(combinedl), 13); + xxh_u64 const bitflipl = (XXH_readLE32(secret) ^ XXH_readLE32(secret+4)) + seed; + xxh_u64 const bitfliph = (XXH_readLE32(secret+8) ^ XXH_readLE32(secret+12)) - seed; + xxh_u64 const keyed_lo = (xxh_u64)combinedl ^ bitflipl; + xxh_u64 const keyed_hi = (xxh_u64)combinedh ^ bitfliph; + XXH128_hash_t h128; + h128.low64 = XXH64_avalanche(keyed_lo); + h128.high64 = XXH64_avalanche(keyed_hi); + return h128; + } +} + +XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t +XXH3_len_4to8_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed) +{ + XXH_ASSERT(input != NULL); + XXH_ASSERT(secret != NULL); + XXH_ASSERT(4 <= len && len <= 8); + seed ^= (xxh_u64)XXH_swap32((xxh_u32)seed) << 32; + { xxh_u32 const input_lo = XXH_readLE32(input); + xxh_u32 const input_hi = XXH_readLE32(input + len - 4); + xxh_u64 const input_64 = input_lo + ((xxh_u64)input_hi << 32); + xxh_u64 const bitflip = (XXH_readLE64(secret+16) ^ XXH_readLE64(secret+24)) + seed; + xxh_u64 const keyed = input_64 ^ bitflip; + + /* Shift len to the left to ensure it is even, this avoids even multiplies. */ + XXH128_hash_t m128 = XXH_mult64to128(keyed, XXH_PRIME64_1 + (len << 2)); + + m128.high64 += (m128.low64 << 1); + m128.low64 ^= (m128.high64 >> 3); + + m128.low64 = XXH_xorshift64(m128.low64, 35); + m128.low64 *= PRIME_MX2; + m128.low64 = XXH_xorshift64(m128.low64, 28); + m128.high64 = XXH3_avalanche(m128.high64); + return m128; + } +} + +XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t +XXH3_len_9to16_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed) +{ + XXH_ASSERT(input != NULL); + XXH_ASSERT(secret != NULL); + XXH_ASSERT(9 <= len && len <= 16); + { xxh_u64 const bitflipl = (XXH_readLE64(secret+32) ^ XXH_readLE64(secret+40)) - seed; + xxh_u64 const bitfliph = (XXH_readLE64(secret+48) ^ XXH_readLE64(secret+56)) + seed; + xxh_u64 const input_lo = XXH_readLE64(input); + xxh_u64 input_hi = XXH_readLE64(input + len - 8); + XXH128_hash_t m128 = XXH_mult64to128(input_lo ^ input_hi ^ bitflipl, XXH_PRIME64_1); + /* + * Put len in the middle of m128 to ensure that the length gets mixed to + * both the low and high bits in the 128x64 multiply below. + */ + m128.low64 += (xxh_u64)(len - 1) << 54; + input_hi ^= bitfliph; + /* + * Add the high 32 bits of input_hi to the high 32 bits of m128, then + * add the long product of the low 32 bits of input_hi and XXH_PRIME32_2 to + * the high 64 bits of m128. + * + * The best approach to this operation is different on 32-bit and 64-bit. + */ + if (sizeof(void *) < sizeof(xxh_u64)) { /* 32-bit */ + /* + * 32-bit optimized version, which is more readable. + * + * On 32-bit, it removes an ADC and delays a dependency between the two + * halves of m128.high64, but it generates an extra mask on 64-bit. + */ + m128.high64 += (input_hi & 0xFFFFFFFF00000000ULL) + XXH_mult32to64((xxh_u32)input_hi, XXH_PRIME32_2); + } else { + /* + * 64-bit optimized (albeit more confusing) version. + * + * Uses some properties of addition and multiplication to remove the mask: + * + * Let: + * a = input_hi.lo = (input_hi & 0x00000000FFFFFFFF) + * b = input_hi.hi = (input_hi & 0xFFFFFFFF00000000) + * c = XXH_PRIME32_2 + * + * a + (b * c) + * Inverse Property: x + y - x == y + * a + (b * (1 + c - 1)) + * Distributive Property: x * (y + z) == (x * y) + (x * z) + * a + (b * 1) + (b * (c - 1)) + * Identity Property: x * 1 == x + * a + b + (b * (c - 1)) + * + * Substitute a, b, and c: + * input_hi.hi + input_hi.lo + ((xxh_u64)input_hi.lo * (XXH_PRIME32_2 - 1)) + * + * Since input_hi.hi + input_hi.lo == input_hi, we get this: + * input_hi + ((xxh_u64)input_hi.lo * (XXH_PRIME32_2 - 1)) + */ + m128.high64 += input_hi + XXH_mult32to64((xxh_u32)input_hi, XXH_PRIME32_2 - 1); + } + /* m128 ^= XXH_swap64(m128 >> 64); */ + m128.low64 ^= XXH_swap64(m128.high64); + + { /* 128x64 multiply: h128 = m128 * XXH_PRIME64_2; */ + XXH128_hash_t h128 = XXH_mult64to128(m128.low64, XXH_PRIME64_2); + h128.high64 += m128.high64 * XXH_PRIME64_2; + + h128.low64 = XXH3_avalanche(h128.low64); + h128.high64 = XXH3_avalanche(h128.high64); + return h128; + } } +} + +/* + * Assumption: `secret` size is >= XXH3_SECRET_SIZE_MIN + */ +XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t +XXH3_len_0to16_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed) +{ + XXH_ASSERT(len <= 16); + { if (len > 8) return XXH3_len_9to16_128b(input, len, secret, seed); + if (len >= 4) return XXH3_len_4to8_128b(input, len, secret, seed); + if (len) return XXH3_len_1to3_128b(input, len, secret, seed); + { XXH128_hash_t h128; + xxh_u64 const bitflipl = XXH_readLE64(secret+64) ^ XXH_readLE64(secret+72); + xxh_u64 const bitfliph = XXH_readLE64(secret+80) ^ XXH_readLE64(secret+88); + h128.low64 = XXH64_avalanche(seed ^ bitflipl); + h128.high64 = XXH64_avalanche( seed ^ bitfliph); + return h128; + } } +} + +/* + * A bit slower than XXH3_mix16B, but handles multiply by zero better. + */ +XXH_FORCE_INLINE XXH128_hash_t +XXH128_mix32B(XXH128_hash_t acc, const xxh_u8* input_1, const xxh_u8* input_2, + const xxh_u8* secret, XXH64_hash_t seed) +{ + acc.low64 += XXH3_mix16B (input_1, secret+0, seed); + acc.low64 ^= XXH_readLE64(input_2) + XXH_readLE64(input_2 + 8); + acc.high64 += XXH3_mix16B (input_2, secret+16, seed); + acc.high64 ^= XXH_readLE64(input_1) + XXH_readLE64(input_1 + 8); + return acc; +} + + +XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t +XXH3_len_17to128_128b(const xxh_u8* XXH_RESTRICT input, size_t len, + const xxh_u8* XXH_RESTRICT secret, size_t secretSize, + XXH64_hash_t seed) +{ + XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize; + XXH_ASSERT(16 < len && len <= 128); + + { XXH128_hash_t acc; + acc.low64 = len * XXH_PRIME64_1; + acc.high64 = 0; + +#if XXH_SIZE_OPT >= 1 + { + /* Smaller, but slightly slower. */ + unsigned int i = (unsigned int)(len - 1) / 32; + do { + acc = XXH128_mix32B(acc, input+16*i, input+len-16*(i+1), secret+32*i, seed); + } while (i-- != 0); + } +#else + if (len > 32) { + if (len > 64) { + if (len > 96) { + acc = XXH128_mix32B(acc, input+48, input+len-64, secret+96, seed); + } + acc = XXH128_mix32B(acc, input+32, input+len-48, secret+64, seed); + } + acc = XXH128_mix32B(acc, input+16, input+len-32, secret+32, seed); + } + acc = XXH128_mix32B(acc, input, input+len-16, secret, seed); +#endif + { XXH128_hash_t h128; + h128.low64 = acc.low64 + acc.high64; + h128.high64 = (acc.low64 * XXH_PRIME64_1) + + (acc.high64 * XXH_PRIME64_4) + + ((len - seed) * XXH_PRIME64_2); + h128.low64 = XXH3_avalanche(h128.low64); + h128.high64 = (XXH64_hash_t)0 - XXH3_avalanche(h128.high64); + return h128; + } + } +} + +XXH_NO_INLINE XXH_PUREF XXH128_hash_t +XXH3_len_129to240_128b(const xxh_u8* XXH_RESTRICT input, size_t len, + const xxh_u8* XXH_RESTRICT secret, size_t secretSize, + XXH64_hash_t seed) +{ + XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize; + XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX); + + { XXH128_hash_t acc; + unsigned i; + acc.low64 = len * XXH_PRIME64_1; + acc.high64 = 0; + /* + * We set as `i` as offset + 32. We do this so that unchanged + * `len` can be used as upper bound. This reaches a sweet spot + * where both x86 and aarch64 get simple agen and good codegen + * for the loop. + */ + for (i = 32; i < 160; i += 32) { + acc = XXH128_mix32B(acc, + input + i - 32, + input + i - 16, + secret + i - 32, + seed); + } + acc.low64 = XXH3_avalanche(acc.low64); + acc.high64 = XXH3_avalanche(acc.high64); + /* + * NB: `i <= len` will duplicate the last 32-bytes if + * len % 32 was zero. This is an unfortunate necessity to keep + * the hash result stable. + */ + for (i=160; i <= len; i += 32) { + acc = XXH128_mix32B(acc, + input + i - 32, + input + i - 16, + secret + XXH3_MIDSIZE_STARTOFFSET + i - 160, + seed); + } + /* last bytes */ + acc = XXH128_mix32B(acc, + input + len - 16, + input + len - 32, + secret + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET - 16, + (XXH64_hash_t)0 - seed); + + { XXH128_hash_t h128; + h128.low64 = acc.low64 + acc.high64; + h128.high64 = (acc.low64 * XXH_PRIME64_1) + + (acc.high64 * XXH_PRIME64_4) + + ((len - seed) * XXH_PRIME64_2); + h128.low64 = XXH3_avalanche(h128.low64); + h128.high64 = (XXH64_hash_t)0 - XXH3_avalanche(h128.high64); + return h128; + } + } +} + +XXH_FORCE_INLINE XXH128_hash_t +XXH3_hashLong_128b_internal(const void* XXH_RESTRICT input, size_t len, + const xxh_u8* XXH_RESTRICT secret, size_t secretSize, + XXH3_f_accumulate f_acc, + XXH3_f_scrambleAcc f_scramble) +{ + XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[XXH_ACC_NB] = XXH3_INIT_ACC; + + XXH3_hashLong_internal_loop(acc, (const xxh_u8*)input, len, secret, secretSize, f_acc, f_scramble); + + /* converge into final hash */ + XXH_STATIC_ASSERT(sizeof(acc) == 64); + XXH_ASSERT(secretSize >= sizeof(acc) + XXH_SECRET_MERGEACCS_START); + { XXH128_hash_t h128; + h128.low64 = XXH3_mergeAccs(acc, + secret + XXH_SECRET_MERGEACCS_START, + (xxh_u64)len * XXH_PRIME64_1); + h128.high64 = XXH3_mergeAccs(acc, + secret + secretSize + - sizeof(acc) - XXH_SECRET_MERGEACCS_START, + ~((xxh_u64)len * XXH_PRIME64_2)); + return h128; + } +} + +/* + * It's important for performance that XXH3_hashLong() is not inlined. + */ +XXH_NO_INLINE XXH_PUREF XXH128_hash_t +XXH3_hashLong_128b_default(const void* XXH_RESTRICT input, size_t len, + XXH64_hash_t seed64, + const void* XXH_RESTRICT secret, size_t secretLen) +{ + (void)seed64; (void)secret; (void)secretLen; + return XXH3_hashLong_128b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), + XXH3_accumulate, XXH3_scrambleAcc); +} + +/* + * It's important for performance to pass @p secretLen (when it's static) + * to the compiler, so that it can properly optimize the vectorized loop. + * + * When the secret size is unknown, or on GCC 12 where the mix of NO_INLINE and FORCE_INLINE + * breaks -Og, this is XXH_NO_INLINE. + */ +XXH3_WITH_SECRET_INLINE XXH128_hash_t +XXH3_hashLong_128b_withSecret(const void* XXH_RESTRICT input, size_t len, + XXH64_hash_t seed64, + const void* XXH_RESTRICT secret, size_t secretLen) +{ + (void)seed64; + return XXH3_hashLong_128b_internal(input, len, (const xxh_u8*)secret, secretLen, + XXH3_accumulate, XXH3_scrambleAcc); +} + +XXH_FORCE_INLINE XXH128_hash_t +XXH3_hashLong_128b_withSeed_internal(const void* XXH_RESTRICT input, size_t len, + XXH64_hash_t seed64, + XXH3_f_accumulate f_acc, + XXH3_f_scrambleAcc f_scramble, + XXH3_f_initCustomSecret f_initSec) +{ + if (seed64 == 0) + return XXH3_hashLong_128b_internal(input, len, + XXH3_kSecret, sizeof(XXH3_kSecret), + f_acc, f_scramble); + { XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE]; + f_initSec(secret, seed64); + return XXH3_hashLong_128b_internal(input, len, (const xxh_u8*)secret, sizeof(secret), + f_acc, f_scramble); + } +} + +/* + * It's important for performance that XXH3_hashLong is not inlined. + */ +XXH_NO_INLINE XXH128_hash_t +XXH3_hashLong_128b_withSeed(const void* input, size_t len, + XXH64_hash_t seed64, const void* XXH_RESTRICT secret, size_t secretLen) +{ + (void)secret; (void)secretLen; + return XXH3_hashLong_128b_withSeed_internal(input, len, seed64, + XXH3_accumulate, XXH3_scrambleAcc, XXH3_initCustomSecret); +} + +typedef XXH128_hash_t (*XXH3_hashLong128_f)(const void* XXH_RESTRICT, size_t, + XXH64_hash_t, const void* XXH_RESTRICT, size_t); + +XXH_FORCE_INLINE XXH128_hash_t +XXH3_128bits_internal(const void* input, size_t len, + XXH64_hash_t seed64, const void* XXH_RESTRICT secret, size_t secretLen, + XXH3_hashLong128_f f_hl128) +{ + XXH_ASSERT(secretLen >= XXH3_SECRET_SIZE_MIN); + /* + * If an action is to be taken if `secret` conditions are not respected, + * it should be done here. + * For now, it's a contract pre-condition. + * Adding a check and a branch here would cost performance at every hash. + */ + if (len <= 16) + return XXH3_len_0to16_128b((const xxh_u8*)input, len, (const xxh_u8*)secret, seed64); + if (len <= 128) + return XXH3_len_17to128_128b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64); + if (len <= XXH3_MIDSIZE_MAX) + return XXH3_len_129to240_128b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64); + return f_hl128(input, len, seed64, secret, secretLen); +} + + +/* === Public XXH128 API === */ + +/*! @ingroup XXH3_family */ +XXH_PUBLIC_API XXH128_hash_t XXH3_128bits(XXH_NOESCAPE const void* input, size_t len) +{ + return XXH3_128bits_internal(input, len, 0, + XXH3_kSecret, sizeof(XXH3_kSecret), + XXH3_hashLong_128b_default); +} + +/*! @ingroup XXH3_family */ +XXH_PUBLIC_API XXH128_hash_t +XXH3_128bits_withSecret(XXH_NOESCAPE const void* input, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize) +{ + return XXH3_128bits_internal(input, len, 0, + (const xxh_u8*)secret, secretSize, + XXH3_hashLong_128b_withSecret); +} + +/*! @ingroup XXH3_family */ +XXH_PUBLIC_API XXH128_hash_t +XXH3_128bits_withSeed(XXH_NOESCAPE const void* input, size_t len, XXH64_hash_t seed) +{ + return XXH3_128bits_internal(input, len, seed, + XXH3_kSecret, sizeof(XXH3_kSecret), + XXH3_hashLong_128b_withSeed); +} + +/*! @ingroup XXH3_family */ +XXH_PUBLIC_API XXH128_hash_t +XXH3_128bits_withSecretandSeed(XXH_NOESCAPE const void* input, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize, XXH64_hash_t seed) +{ + if (len <= XXH3_MIDSIZE_MAX) + return XXH3_128bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), NULL); + return XXH3_hashLong_128b_withSecret(input, len, seed, secret, secretSize); +} + +/*! @ingroup XXH3_family */ +XXH_PUBLIC_API XXH128_hash_t +XXH128(XXH_NOESCAPE const void* input, size_t len, XXH64_hash_t seed) +{ + return XXH3_128bits_withSeed(input, len, seed); +} + + +/* === XXH3 128-bit streaming === */ +#ifndef XXH_NO_STREAM +/* + * All initialization and update functions are identical to 64-bit streaming variant. + * The only difference is the finalization routine. + */ + +/*! @ingroup XXH3_family */ +XXH_PUBLIC_API XXH_errorcode +XXH3_128bits_reset(XXH_NOESCAPE XXH3_state_t* statePtr) +{ + return XXH3_64bits_reset(statePtr); +} + +/*! @ingroup XXH3_family */ +XXH_PUBLIC_API XXH_errorcode +XXH3_128bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize) +{ + return XXH3_64bits_reset_withSecret(statePtr, secret, secretSize); +} + +/*! @ingroup XXH3_family */ +XXH_PUBLIC_API XXH_errorcode +XXH3_128bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH64_hash_t seed) +{ + return XXH3_64bits_reset_withSeed(statePtr, seed); +} + +/*! @ingroup XXH3_family */ +XXH_PUBLIC_API XXH_errorcode +XXH3_128bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize, XXH64_hash_t seed) +{ + return XXH3_64bits_reset_withSecretandSeed(statePtr, secret, secretSize, seed); +} + +/*! @ingroup XXH3_family */ +XXH_PUBLIC_API XXH_errorcode +XXH3_128bits_update(XXH_NOESCAPE XXH3_state_t* state, XXH_NOESCAPE const void* input, size_t len) +{ + return XXH3_64bits_update(state, input, len); +} + +/*! @ingroup XXH3_family */ +XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_digest (XXH_NOESCAPE const XXH3_state_t* state) +{ + const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret; + if (state->totalLen > XXH3_MIDSIZE_MAX) { + XXH_ALIGN(XXH_ACC_ALIGN) XXH64_hash_t acc[XXH_ACC_NB]; + XXH3_digest_long(acc, state, secret); + XXH_ASSERT(state->secretLimit + XXH_STRIPE_LEN >= sizeof(acc) + XXH_SECRET_MERGEACCS_START); + { XXH128_hash_t h128; + h128.low64 = XXH3_mergeAccs(acc, + secret + XXH_SECRET_MERGEACCS_START, + (xxh_u64)state->totalLen * XXH_PRIME64_1); + h128.high64 = XXH3_mergeAccs(acc, + secret + state->secretLimit + XXH_STRIPE_LEN + - sizeof(acc) - XXH_SECRET_MERGEACCS_START, + ~((xxh_u64)state->totalLen * XXH_PRIME64_2)); + return h128; + } + } + /* len <= XXH3_MIDSIZE_MAX : short code */ + if (state->seed) + return XXH3_128bits_withSeed(state->buffer, (size_t)state->totalLen, state->seed); + return XXH3_128bits_withSecret(state->buffer, (size_t)(state->totalLen), + secret, state->secretLimit + XXH_STRIPE_LEN); +} +#endif /* !XXH_NO_STREAM */ +/* 128-bit utility functions */ + +#include /* memcmp, memcpy */ + +/* return : 1 is equal, 0 if different */ +/*! @ingroup XXH3_family */ +XXH_PUBLIC_API int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2) +{ + /* note : XXH128_hash_t is compact, it has no padding byte */ + return !(memcmp(&h1, &h2, sizeof(h1))); +} + +/* This prototype is compatible with stdlib's qsort(). + * @return : >0 if *h128_1 > *h128_2 + * <0 if *h128_1 < *h128_2 + * =0 if *h128_1 == *h128_2 */ +/*! @ingroup XXH3_family */ +XXH_PUBLIC_API int XXH128_cmp(XXH_NOESCAPE const void* h128_1, XXH_NOESCAPE const void* h128_2) +{ + XXH128_hash_t const h1 = *(const XXH128_hash_t*)h128_1; + XXH128_hash_t const h2 = *(const XXH128_hash_t*)h128_2; + int const hcmp = (h1.high64 > h2.high64) - (h2.high64 > h1.high64); + /* note : bets that, in most cases, hash values are different */ + if (hcmp) return hcmp; + return (h1.low64 > h2.low64) - (h2.low64 > h1.low64); +} + + +/*====== Canonical representation ======*/ +/*! @ingroup XXH3_family */ +XXH_PUBLIC_API void +XXH128_canonicalFromHash(XXH_NOESCAPE XXH128_canonical_t* dst, XXH128_hash_t hash) +{ + XXH_STATIC_ASSERT(sizeof(XXH128_canonical_t) == sizeof(XXH128_hash_t)); + if (XXH_CPU_LITTLE_ENDIAN) { + hash.high64 = XXH_swap64(hash.high64); + hash.low64 = XXH_swap64(hash.low64); + } + XXH_memcpy(dst, &hash.high64, sizeof(hash.high64)); + XXH_memcpy((char*)dst + sizeof(hash.high64), &hash.low64, sizeof(hash.low64)); +} + +/*! @ingroup XXH3_family */ +XXH_PUBLIC_API XXH128_hash_t +XXH128_hashFromCanonical(XXH_NOESCAPE const XXH128_canonical_t* src) +{ + XXH128_hash_t h; + h.high64 = XXH_readBE64(src); + h.low64 = XXH_readBE64(src->digest + 8); + return h; +} + + + +/* ========================================== + * Secret generators + * ========================================== + */ +#define XXH_MIN(x, y) (((x) > (y)) ? (y) : (x)) + +XXH_FORCE_INLINE void XXH3_combine16(void* dst, XXH128_hash_t h128) +{ + XXH_writeLE64( dst, XXH_readLE64(dst) ^ h128.low64 ); + XXH_writeLE64( (char*)dst+8, XXH_readLE64((char*)dst+8) ^ h128.high64 ); +} + +/*! @ingroup XXH3_family */ +XXH_PUBLIC_API XXH_errorcode +XXH3_generateSecret(XXH_NOESCAPE void* secretBuffer, size_t secretSize, XXH_NOESCAPE const void* customSeed, size_t customSeedSize) +{ +#if (XXH_DEBUGLEVEL >= 1) + XXH_ASSERT(secretBuffer != NULL); + XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); +#else + /* production mode, assert() are disabled */ + if (secretBuffer == NULL) return XXH_ERROR; + if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR; +#endif + + if (customSeedSize == 0) { + customSeed = XXH3_kSecret; + customSeedSize = XXH_SECRET_DEFAULT_SIZE; + } +#if (XXH_DEBUGLEVEL >= 1) + XXH_ASSERT(customSeed != NULL); +#else + if (customSeed == NULL) return XXH_ERROR; +#endif + + /* Fill secretBuffer with a copy of customSeed - repeat as needed */ + { size_t pos = 0; + while (pos < secretSize) { + size_t const toCopy = XXH_MIN((secretSize - pos), customSeedSize); + memcpy((char*)secretBuffer + pos, customSeed, toCopy); + pos += toCopy; + } } + + { size_t const nbSeg16 = secretSize / 16; + size_t n; + XXH128_canonical_t scrambler; + XXH128_canonicalFromHash(&scrambler, XXH128(customSeed, customSeedSize, 0)); + for (n=0; n +#include +#include + +#if defined(__GNUC__) && __GNUC__ >= 4 +# define ZSTD_memcpy(d,s,l) __builtin_memcpy((d),(s),(l)) +# define ZSTD_memmove(d,s,l) __builtin_memmove((d),(s),(l)) +# define ZSTD_memset(p,v,l) __builtin_memset((p),(v),(l)) +#else +# define ZSTD_memcpy(d,s,l) memcpy((d),(s),(l)) +# define ZSTD_memmove(d,s,l) memmove((d),(s),(l)) +# define ZSTD_memset(p,v,l) memset((p),(v),(l)) +#endif + +#endif /* ZSTD_DEPS_COMMON */ + +/* Need: + * ZSTD_malloc() + * ZSTD_free() + * ZSTD_calloc() + */ +#ifdef ZSTD_DEPS_NEED_MALLOC +#ifndef ZSTD_DEPS_MALLOC +#define ZSTD_DEPS_MALLOC + +#include + +#define ZSTD_malloc(s) malloc(s) +#define ZSTD_calloc(n,s) calloc((n), (s)) +#define ZSTD_free(p) free((p)) + +#endif /* ZSTD_DEPS_MALLOC */ +#endif /* ZSTD_DEPS_NEED_MALLOC */ + +/* + * Provides 64-bit math support. + * Need: + * U64 ZSTD_div64(U64 dividend, U32 divisor) + */ +#ifdef ZSTD_DEPS_NEED_MATH64 +#ifndef ZSTD_DEPS_MATH64 +#define ZSTD_DEPS_MATH64 + +#define ZSTD_div64(dividend, divisor) ((dividend) / (divisor)) + +#endif /* ZSTD_DEPS_MATH64 */ +#endif /* ZSTD_DEPS_NEED_MATH64 */ + +/* Need: + * assert() + */ +#ifdef ZSTD_DEPS_NEED_ASSERT +#ifndef ZSTD_DEPS_ASSERT +#define ZSTD_DEPS_ASSERT + +#include + +#endif /* ZSTD_DEPS_ASSERT */ +#endif /* ZSTD_DEPS_NEED_ASSERT */ + +/* Need: + * ZSTD_DEBUG_PRINT() + */ +#ifdef ZSTD_DEPS_NEED_IO +#ifndef ZSTD_DEPS_IO +#define ZSTD_DEPS_IO + +#include +#define ZSTD_DEBUG_PRINT(...) fprintf(stderr, __VA_ARGS__) + +#endif /* ZSTD_DEPS_IO */ +#endif /* ZSTD_DEPS_NEED_IO */ + +/* Only requested when is known to be present. + * Need: + * intptr_t + */ +#ifdef ZSTD_DEPS_NEED_STDINT +#ifndef ZSTD_DEPS_STDINT +#define ZSTD_DEPS_STDINT + +#include + +#endif /* ZSTD_DEPS_STDINT */ +#endif /* ZSTD_DEPS_NEED_STDINT */ diff --git a/c-blosc/internal-complibs/zstd-1.5.6/common/zstd_internal.h b/c-blosc/internal-complibs/zstd-1.5.6/common/zstd_internal.h new file mode 100644 index 0000000..ecb9cfb --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.5.6/common/zstd_internal.h @@ -0,0 +1,392 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_CCOMMON_H_MODULE +#define ZSTD_CCOMMON_H_MODULE + +/* this module contains definitions which must be identical + * across compression, decompression and dictBuilder. + * It also contains a few functions useful to at least 2 of them + * and which benefit from being inlined */ + +/*-************************************* +* Dependencies +***************************************/ +#include "compiler.h" +#include "cpu.h" +#include "mem.h" +#include "debug.h" /* assert, DEBUGLOG, RAWLOG, g_debuglevel */ +#include "error_private.h" +#define ZSTD_STATIC_LINKING_ONLY +#include "../zstd.h" +#define FSE_STATIC_LINKING_ONLY +#include "fse.h" +#include "huf.h" +#ifndef XXH_STATIC_LINKING_ONLY +# define XXH_STATIC_LINKING_ONLY /* XXH64_state_t */ +#endif +#include "xxhash.h" /* XXH_reset, update, digest */ +#ifndef ZSTD_NO_TRACE +# include "zstd_trace.h" +#else +# define ZSTD_TRACE 0 +#endif + +#if defined (__cplusplus) +extern "C" { +#endif + +/* ---- static assert (debug) --- */ +#define ZSTD_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) +#define ZSTD_isError ERR_isError /* for inlining */ +#define FSE_isError ERR_isError +#define HUF_isError ERR_isError + + +/*-************************************* +* shared macros +***************************************/ +#undef MIN +#undef MAX +#define MIN(a,b) ((a)<(b) ? (a) : (b)) +#define MAX(a,b) ((a)>(b) ? (a) : (b)) +#define BOUNDED(min,val,max) (MAX(min,MIN(val,max))) + + +/*-************************************* +* Common constants +***************************************/ +#define ZSTD_OPT_NUM (1<<12) + +#define ZSTD_REP_NUM 3 /* number of repcodes */ +static UNUSED_ATTR const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 }; + +#define KB *(1 <<10) +#define MB *(1 <<20) +#define GB *(1U<<30) + +#define BIT7 128 +#define BIT6 64 +#define BIT5 32 +#define BIT4 16 +#define BIT1 2 +#define BIT0 1 + +#define ZSTD_WINDOWLOG_ABSOLUTEMIN 10 +static UNUSED_ATTR const size_t ZSTD_fcs_fieldSize[4] = { 0, 2, 4, 8 }; +static UNUSED_ATTR const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 }; + +#define ZSTD_FRAMEIDSIZE 4 /* magic number size */ + +#define ZSTD_BLOCKHEADERSIZE 3 /* C standard doesn't allow `static const` variable to be init using another `static const` variable */ +static UNUSED_ATTR const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE; +typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e; + +#define ZSTD_FRAMECHECKSUMSIZE 4 + +#define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */ +#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */) /* for a non-null block */ +#define MIN_LITERALS_FOR_4_STREAMS 6 + +typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingType_e; + +#define LONGNBSEQ 0x7F00 + +#define MINMATCH 3 + +#define Litbits 8 +#define LitHufLog 11 +#define MaxLit ((1<= WILDCOPY_VECLEN || diff <= -WILDCOPY_VECLEN); + /* Separate out the first COPY16() call because the copy length is + * almost certain to be short, so the branches have different + * probabilities. Since it is almost certain to be short, only do + * one COPY16() in the first call. Then, do two calls per loop since + * at that point it is more likely to have a high trip count. + */ + ZSTD_copy16(op, ip); + if (16 >= length) return; + op += 16; + ip += 16; + do { + COPY16(op, ip); + COPY16(op, ip); + } + while (op < oend); + } +} + +MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + size_t const length = MIN(dstCapacity, srcSize); + if (length > 0) { + ZSTD_memcpy(dst, src, length); + } + return length; +} + +/* define "workspace is too large" as this number of times larger than needed */ +#define ZSTD_WORKSPACETOOLARGE_FACTOR 3 + +/* when workspace is continuously too large + * during at least this number of times, + * context's memory usage is considered wasteful, + * because it's sized to handle a worst case scenario which rarely happens. + * In which case, resize it down to free some memory */ +#define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128 + +/* Controls whether the input/output buffer is buffered or stable. */ +typedef enum { + ZSTD_bm_buffered = 0, /* Buffer the input/output */ + ZSTD_bm_stable = 1 /* ZSTD_inBuffer/ZSTD_outBuffer is stable */ +} ZSTD_bufferMode_e; + + +/*-******************************************* +* Private declarations +*********************************************/ +typedef struct seqDef_s { + U32 offBase; /* offBase == Offset + ZSTD_REP_NUM, or repcode 1,2,3 */ + U16 litLength; + U16 mlBase; /* mlBase == matchLength - MINMATCH */ +} seqDef; + +/* Controls whether seqStore has a single "long" litLength or matchLength. See seqStore_t. */ +typedef enum { + ZSTD_llt_none = 0, /* no longLengthType */ + ZSTD_llt_literalLength = 1, /* represents a long literal */ + ZSTD_llt_matchLength = 2 /* represents a long match */ +} ZSTD_longLengthType_e; + +typedef struct { + seqDef* sequencesStart; + seqDef* sequences; /* ptr to end of sequences */ + BYTE* litStart; + BYTE* lit; /* ptr to end of literals */ + BYTE* llCode; + BYTE* mlCode; + BYTE* ofCode; + size_t maxNbSeq; + size_t maxNbLit; + + /* longLengthPos and longLengthType to allow us to represent either a single litLength or matchLength + * in the seqStore that has a value larger than U16 (if it exists). To do so, we increment + * the existing value of the litLength or matchLength by 0x10000. + */ + ZSTD_longLengthType_e longLengthType; + U32 longLengthPos; /* Index of the sequence to apply long length modification to */ +} seqStore_t; + +typedef struct { + U32 litLength; + U32 matchLength; +} ZSTD_sequenceLength; + +/** + * Returns the ZSTD_sequenceLength for the given sequences. It handles the decoding of long sequences + * indicated by longLengthPos and longLengthType, and adds MINMATCH back to matchLength. + */ +MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore, seqDef const* seq) +{ + ZSTD_sequenceLength seqLen; + seqLen.litLength = seq->litLength; + seqLen.matchLength = seq->mlBase + MINMATCH; + if (seqStore->longLengthPos == (U32)(seq - seqStore->sequencesStart)) { + if (seqStore->longLengthType == ZSTD_llt_literalLength) { + seqLen.litLength += 0x10000; + } + if (seqStore->longLengthType == ZSTD_llt_matchLength) { + seqLen.matchLength += 0x10000; + } + } + return seqLen; +} + +/** + * Contains the compressed frame size and an upper-bound for the decompressed frame size. + * Note: before using `compressedSize`, check for errors using ZSTD_isError(). + * similarly, before using `decompressedBound`, check for errors using: + * `decompressedBound != ZSTD_CONTENTSIZE_ERROR` + */ +typedef struct { + size_t nbBlocks; + size_t compressedSize; + unsigned long long decompressedBound; +} ZSTD_frameSizeInfo; /* decompress & legacy */ + +const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx); /* compress & dictBuilder */ +int ZSTD_seqToCodes(const seqStore_t* seqStorePtr); /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */ + + +/* ZSTD_invalidateRepCodes() : + * ensures next compression will not use repcodes from previous block. + * Note : only works with regular variant; + * do not use with extDict variant ! */ +void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx); /* zstdmt, adaptive_compression (shouldn't get this definition from here) */ + + +typedef struct { + blockType_e blockType; + U32 lastBlock; + U32 origSize; +} blockProperties_t; /* declared here for decompress and fullbench */ + +/*! ZSTD_getcBlockSize() : + * Provides the size of compressed block from block header `src` */ +/* Used by: decompress, fullbench */ +size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, + blockProperties_t* bpPtr); + +/*! ZSTD_decodeSeqHeaders() : + * decode sequence header from src */ +/* Used by: zstd_decompress_block, fullbench */ +size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, + const void* src, size_t srcSize); + +/** + * @returns true iff the CPU supports dynamic BMI2 dispatch. + */ +MEM_STATIC int ZSTD_cpuSupportsBmi2(void) +{ + ZSTD_cpuid_t cpuid = ZSTD_cpuid(); + return ZSTD_cpuid_bmi1(cpuid) && ZSTD_cpuid_bmi2(cpuid); +} + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_CCOMMON_H_MODULE */ diff --git a/c-blosc/internal-complibs/zstd-1.5.6/common/zstd_trace.h b/c-blosc/internal-complibs/zstd-1.5.6/common/zstd_trace.h new file mode 100644 index 0000000..da20534 --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.5.6/common/zstd_trace.h @@ -0,0 +1,163 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_TRACE_H +#define ZSTD_TRACE_H + +#if defined (__cplusplus) +extern "C" { +#endif + +#include + +/* weak symbol support + * For now, enable conservatively: + * - Only GNUC + * - Only ELF + * - Only x86-64, i386 and aarch64 + * Also, explicitly disable on platforms known not to work so they aren't + * forgotten in the future. + */ +#if !defined(ZSTD_HAVE_WEAK_SYMBOLS) && \ + defined(__GNUC__) && defined(__ELF__) && \ + (defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || defined(_M_IX86) || defined(__aarch64__)) && \ + !defined(__APPLE__) && !defined(_WIN32) && !defined(__MINGW32__) && \ + !defined(__CYGWIN__) && !defined(_AIX) +# define ZSTD_HAVE_WEAK_SYMBOLS 1 +#else +# define ZSTD_HAVE_WEAK_SYMBOLS 0 +#endif +#if ZSTD_HAVE_WEAK_SYMBOLS +# define ZSTD_WEAK_ATTR __attribute__((__weak__)) +#else +# define ZSTD_WEAK_ATTR +#endif + +/* Only enable tracing when weak symbols are available. */ +#ifndef ZSTD_TRACE +# define ZSTD_TRACE ZSTD_HAVE_WEAK_SYMBOLS +#endif + +#if ZSTD_TRACE + +struct ZSTD_CCtx_s; +struct ZSTD_DCtx_s; +struct ZSTD_CCtx_params_s; + +typedef struct { + /** + * ZSTD_VERSION_NUMBER + * + * This is guaranteed to be the first member of ZSTD_trace. + * Otherwise, this struct is not stable between versions. If + * the version number does not match your expectation, you + * should not interpret the rest of the struct. + */ + unsigned version; + /** + * Non-zero if streaming (de)compression is used. + */ + unsigned streaming; + /** + * The dictionary ID. + */ + unsigned dictionaryID; + /** + * Is the dictionary cold? + * Only set on decompression. + */ + unsigned dictionaryIsCold; + /** + * The dictionary size or zero if no dictionary. + */ + size_t dictionarySize; + /** + * The uncompressed size of the data. + */ + size_t uncompressedSize; + /** + * The compressed size of the data. + */ + size_t compressedSize; + /** + * The fully resolved CCtx parameters (NULL on decompression). + */ + struct ZSTD_CCtx_params_s const* params; + /** + * The ZSTD_CCtx pointer (NULL on decompression). + */ + struct ZSTD_CCtx_s const* cctx; + /** + * The ZSTD_DCtx pointer (NULL on compression). + */ + struct ZSTD_DCtx_s const* dctx; +} ZSTD_Trace; + +/** + * A tracing context. It must be 0 when tracing is disabled. + * Otherwise, any non-zero value returned by a tracing begin() + * function is presented to any subsequent calls to end(). + * + * Any non-zero value is treated as tracing is enabled and not + * interpreted by the library. + * + * Two possible uses are: + * * A timestamp for when the begin() function was called. + * * A unique key identifying the (de)compression, like the + * address of the [dc]ctx pointer if you need to track + * more information than just a timestamp. + */ +typedef unsigned long long ZSTD_TraceCtx; + +/** + * Trace the beginning of a compression call. + * @param cctx The dctx pointer for the compression. + * It can be used as a key to map begin() to end(). + * @returns Non-zero if tracing is enabled. The return value is + * passed to ZSTD_trace_compress_end(). + */ +ZSTD_WEAK_ATTR ZSTD_TraceCtx ZSTD_trace_compress_begin( + struct ZSTD_CCtx_s const* cctx); + +/** + * Trace the end of a compression call. + * @param ctx The return value of ZSTD_trace_compress_begin(). + * @param trace The zstd tracing info. + */ +ZSTD_WEAK_ATTR void ZSTD_trace_compress_end( + ZSTD_TraceCtx ctx, + ZSTD_Trace const* trace); + +/** + * Trace the beginning of a decompression call. + * @param dctx The dctx pointer for the decompression. + * It can be used as a key to map begin() to end(). + * @returns Non-zero if tracing is enabled. The return value is + * passed to ZSTD_trace_compress_end(). + */ +ZSTD_WEAK_ATTR ZSTD_TraceCtx ZSTD_trace_decompress_begin( + struct ZSTD_DCtx_s const* dctx); + +/** + * Trace the end of a decompression call. + * @param ctx The return value of ZSTD_trace_decompress_begin(). + * @param trace The zstd tracing info. + */ +ZSTD_WEAK_ATTR void ZSTD_trace_decompress_end( + ZSTD_TraceCtx ctx, + ZSTD_Trace const* trace); + +#endif /* ZSTD_TRACE */ + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_TRACE_H */ diff --git a/c-blosc/internal-complibs/zstd-1.5.6/compress/clevels.h b/c-blosc/internal-complibs/zstd-1.5.6/compress/clevels.h new file mode 100644 index 0000000..c18da46 --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.5.6/compress/clevels.h @@ -0,0 +1,134 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_CLEVELS_H +#define ZSTD_CLEVELS_H + +#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_compressionParameters */ +#include "../zstd.h" + +/*-===== Pre-defined compression levels =====-*/ + +#define ZSTD_MAX_CLEVEL 22 + +#ifdef __GNUC__ +__attribute__((__unused__)) +#endif + +static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = { +{ /* "default" - for any srcSize > 256 KB */ + /* W, C, H, S, L, TL, strat */ + { 19, 12, 13, 1, 6, 1, ZSTD_fast }, /* base for negative levels */ + { 19, 13, 14, 1, 7, 0, ZSTD_fast }, /* level 1 */ + { 20, 15, 16, 1, 6, 0, ZSTD_fast }, /* level 2 */ + { 21, 16, 17, 1, 5, 0, ZSTD_dfast }, /* level 3 */ + { 21, 18, 18, 1, 5, 0, ZSTD_dfast }, /* level 4 */ + { 21, 18, 19, 3, 5, 2, ZSTD_greedy }, /* level 5 */ + { 21, 18, 19, 3, 5, 4, ZSTD_lazy }, /* level 6 */ + { 21, 19, 20, 4, 5, 8, ZSTD_lazy }, /* level 7 */ + { 21, 19, 20, 4, 5, 16, ZSTD_lazy2 }, /* level 8 */ + { 22, 20, 21, 4, 5, 16, ZSTD_lazy2 }, /* level 9 */ + { 22, 21, 22, 5, 5, 16, ZSTD_lazy2 }, /* level 10 */ + { 22, 21, 22, 6, 5, 16, ZSTD_lazy2 }, /* level 11 */ + { 22, 22, 23, 6, 5, 32, ZSTD_lazy2 }, /* level 12 */ + { 22, 22, 22, 4, 5, 32, ZSTD_btlazy2 }, /* level 13 */ + { 22, 22, 23, 5, 5, 32, ZSTD_btlazy2 }, /* level 14 */ + { 22, 23, 23, 6, 5, 32, ZSTD_btlazy2 }, /* level 15 */ + { 22, 22, 22, 5, 5, 48, ZSTD_btopt }, /* level 16 */ + { 23, 23, 22, 5, 4, 64, ZSTD_btopt }, /* level 17 */ + { 23, 23, 22, 6, 3, 64, ZSTD_btultra }, /* level 18 */ + { 23, 24, 22, 7, 3,256, ZSTD_btultra2}, /* level 19 */ + { 25, 25, 23, 7, 3,256, ZSTD_btultra2}, /* level 20 */ + { 26, 26, 24, 7, 3,512, ZSTD_btultra2}, /* level 21 */ + { 27, 27, 25, 9, 3,999, ZSTD_btultra2}, /* level 22 */ +}, +{ /* for srcSize <= 256 KB */ + /* W, C, H, S, L, T, strat */ + { 18, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */ + { 18, 13, 14, 1, 6, 0, ZSTD_fast }, /* level 1 */ + { 18, 14, 14, 1, 5, 0, ZSTD_dfast }, /* level 2 */ + { 18, 16, 16, 1, 4, 0, ZSTD_dfast }, /* level 3 */ + { 18, 16, 17, 3, 5, 2, ZSTD_greedy }, /* level 4.*/ + { 18, 17, 18, 5, 5, 2, ZSTD_greedy }, /* level 5.*/ + { 18, 18, 19, 3, 5, 4, ZSTD_lazy }, /* level 6.*/ + { 18, 18, 19, 4, 4, 4, ZSTD_lazy }, /* level 7 */ + { 18, 18, 19, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */ + { 18, 18, 19, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */ + { 18, 18, 19, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */ + { 18, 18, 19, 5, 4, 12, ZSTD_btlazy2 }, /* level 11.*/ + { 18, 19, 19, 7, 4, 12, ZSTD_btlazy2 }, /* level 12.*/ + { 18, 18, 19, 4, 4, 16, ZSTD_btopt }, /* level 13 */ + { 18, 18, 19, 4, 3, 32, ZSTD_btopt }, /* level 14.*/ + { 18, 18, 19, 6, 3,128, ZSTD_btopt }, /* level 15.*/ + { 18, 19, 19, 6, 3,128, ZSTD_btultra }, /* level 16.*/ + { 18, 19, 19, 8, 3,256, ZSTD_btultra }, /* level 17.*/ + { 18, 19, 19, 6, 3,128, ZSTD_btultra2}, /* level 18.*/ + { 18, 19, 19, 8, 3,256, ZSTD_btultra2}, /* level 19.*/ + { 18, 19, 19, 10, 3,512, ZSTD_btultra2}, /* level 20.*/ + { 18, 19, 19, 12, 3,512, ZSTD_btultra2}, /* level 21.*/ + { 18, 19, 19, 13, 3,999, ZSTD_btultra2}, /* level 22.*/ +}, +{ /* for srcSize <= 128 KB */ + /* W, C, H, S, L, T, strat */ + { 17, 12, 12, 1, 5, 1, ZSTD_fast }, /* base for negative levels */ + { 17, 12, 13, 1, 6, 0, ZSTD_fast }, /* level 1 */ + { 17, 13, 15, 1, 5, 0, ZSTD_fast }, /* level 2 */ + { 17, 15, 16, 2, 5, 0, ZSTD_dfast }, /* level 3 */ + { 17, 17, 17, 2, 4, 0, ZSTD_dfast }, /* level 4 */ + { 17, 16, 17, 3, 4, 2, ZSTD_greedy }, /* level 5 */ + { 17, 16, 17, 3, 4, 4, ZSTD_lazy }, /* level 6 */ + { 17, 16, 17, 3, 4, 8, ZSTD_lazy2 }, /* level 7 */ + { 17, 16, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */ + { 17, 16, 17, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */ + { 17, 16, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */ + { 17, 17, 17, 5, 4, 8, ZSTD_btlazy2 }, /* level 11 */ + { 17, 18, 17, 7, 4, 12, ZSTD_btlazy2 }, /* level 12 */ + { 17, 18, 17, 3, 4, 12, ZSTD_btopt }, /* level 13.*/ + { 17, 18, 17, 4, 3, 32, ZSTD_btopt }, /* level 14.*/ + { 17, 18, 17, 6, 3,256, ZSTD_btopt }, /* level 15.*/ + { 17, 18, 17, 6, 3,128, ZSTD_btultra }, /* level 16.*/ + { 17, 18, 17, 8, 3,256, ZSTD_btultra }, /* level 17.*/ + { 17, 18, 17, 10, 3,512, ZSTD_btultra }, /* level 18.*/ + { 17, 18, 17, 5, 3,256, ZSTD_btultra2}, /* level 19.*/ + { 17, 18, 17, 7, 3,512, ZSTD_btultra2}, /* level 20.*/ + { 17, 18, 17, 9, 3,512, ZSTD_btultra2}, /* level 21.*/ + { 17, 18, 17, 11, 3,999, ZSTD_btultra2}, /* level 22.*/ +}, +{ /* for srcSize <= 16 KB */ + /* W, C, H, S, L, T, strat */ + { 14, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */ + { 14, 14, 15, 1, 5, 0, ZSTD_fast }, /* level 1 */ + { 14, 14, 15, 1, 4, 0, ZSTD_fast }, /* level 2 */ + { 14, 14, 15, 2, 4, 0, ZSTD_dfast }, /* level 3 */ + { 14, 14, 14, 4, 4, 2, ZSTD_greedy }, /* level 4 */ + { 14, 14, 14, 3, 4, 4, ZSTD_lazy }, /* level 5.*/ + { 14, 14, 14, 4, 4, 8, ZSTD_lazy2 }, /* level 6 */ + { 14, 14, 14, 6, 4, 8, ZSTD_lazy2 }, /* level 7 */ + { 14, 14, 14, 8, 4, 8, ZSTD_lazy2 }, /* level 8.*/ + { 14, 15, 14, 5, 4, 8, ZSTD_btlazy2 }, /* level 9.*/ + { 14, 15, 14, 9, 4, 8, ZSTD_btlazy2 }, /* level 10.*/ + { 14, 15, 14, 3, 4, 12, ZSTD_btopt }, /* level 11.*/ + { 14, 15, 14, 4, 3, 24, ZSTD_btopt }, /* level 12.*/ + { 14, 15, 14, 5, 3, 32, ZSTD_btultra }, /* level 13.*/ + { 14, 15, 15, 6, 3, 64, ZSTD_btultra }, /* level 14.*/ + { 14, 15, 15, 7, 3,256, ZSTD_btultra }, /* level 15.*/ + { 14, 15, 15, 5, 3, 48, ZSTD_btultra2}, /* level 16.*/ + { 14, 15, 15, 6, 3,128, ZSTD_btultra2}, /* level 17.*/ + { 14, 15, 15, 7, 3,256, ZSTD_btultra2}, /* level 18.*/ + { 14, 15, 15, 8, 3,256, ZSTD_btultra2}, /* level 19.*/ + { 14, 15, 15, 8, 3,512, ZSTD_btultra2}, /* level 20.*/ + { 14, 15, 15, 9, 3,512, ZSTD_btultra2}, /* level 21.*/ + { 14, 15, 15, 10, 3,999, ZSTD_btultra2}, /* level 22.*/ +}, +}; + + + +#endif /* ZSTD_CLEVELS_H */ diff --git a/c-blosc/internal-complibs/zstd-1.5.6/compress/fse_compress.c b/c-blosc/internal-complibs/zstd-1.5.6/compress/fse_compress.c new file mode 100644 index 0000000..1ce3cf1 --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.5.6/compress/fse_compress.c @@ -0,0 +1,625 @@ +/* ****************************************************************** + * FSE : Finite State Entropy encoder + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * You can contact the author at : + * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy + * - Public forum : https://groups.google.com/forum/#!forum/lz4c + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ + +/* ************************************************************** +* Includes +****************************************************************/ +#include "../common/compiler.h" +#include "../common/mem.h" /* U32, U16, etc. */ +#include "../common/debug.h" /* assert, DEBUGLOG */ +#include "hist.h" /* HIST_count_wksp */ +#include "../common/bitstream.h" +#define FSE_STATIC_LINKING_ONLY +#include "../common/fse.h" +#include "../common/error_private.h" +#define ZSTD_DEPS_NEED_MALLOC +#define ZSTD_DEPS_NEED_MATH64 +#include "../common/zstd_deps.h" /* ZSTD_memset */ +#include "../common/bits.h" /* ZSTD_highbit32 */ + + +/* ************************************************************** +* Error Management +****************************************************************/ +#define FSE_isError ERR_isError + + +/* ************************************************************** +* Templates +****************************************************************/ +/* + designed to be included + for type-specific functions (template emulation in C) + Objective is to write these functions only once, for improved maintenance +*/ + +/* safety checks */ +#ifndef FSE_FUNCTION_EXTENSION +# error "FSE_FUNCTION_EXTENSION must be defined" +#endif +#ifndef FSE_FUNCTION_TYPE +# error "FSE_FUNCTION_TYPE must be defined" +#endif + +/* Function names */ +#define FSE_CAT(X,Y) X##Y +#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y) +#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y) + + +/* Function templates */ + +/* FSE_buildCTable_wksp() : + * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`). + * wkspSize should be sized to handle worst case situation, which is `1<>1 : 1) ; + FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT); + U32 const step = FSE_TABLESTEP(tableSize); + U32 const maxSV1 = maxSymbolValue+1; + + U16* cumul = (U16*)workSpace; /* size = maxSV1 */ + FSE_FUNCTION_TYPE* const tableSymbol = (FSE_FUNCTION_TYPE*)(cumul + (maxSV1+1)); /* size = tableSize */ + + U32 highThreshold = tableSize-1; + + assert(((size_t)workSpace & 1) == 0); /* Must be 2 bytes-aligned */ + if (FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) > wkspSize) return ERROR(tableLog_tooLarge); + /* CTable header */ + tableU16[-2] = (U16) tableLog; + tableU16[-1] = (U16) maxSymbolValue; + assert(tableLog < 16); /* required for threshold strategy to work */ + + /* For explanations on how to distribute symbol values over the table : + * https://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */ + + #ifdef __clang_analyzer__ + ZSTD_memset(tableSymbol, 0, sizeof(*tableSymbol) * tableSize); /* useless initialization, just to keep scan-build happy */ + #endif + + /* symbol start positions */ + { U32 u; + cumul[0] = 0; + for (u=1; u <= maxSV1; u++) { + if (normalizedCounter[u-1]==-1) { /* Low proba symbol */ + cumul[u] = cumul[u-1] + 1; + tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u-1); + } else { + assert(normalizedCounter[u-1] >= 0); + cumul[u] = cumul[u-1] + (U16)normalizedCounter[u-1]; + assert(cumul[u] >= cumul[u-1]); /* no overflow */ + } } + cumul[maxSV1] = (U16)(tableSize+1); + } + + /* Spread symbols */ + if (highThreshold == tableSize - 1) { + /* Case for no low prob count symbols. Lay down 8 bytes at a time + * to reduce branch misses since we are operating on a small block + */ + BYTE* const spread = tableSymbol + tableSize; /* size = tableSize + 8 (may write beyond tableSize) */ + { U64 const add = 0x0101010101010101ull; + size_t pos = 0; + U64 sv = 0; + U32 s; + for (s=0; s=0); + pos += (size_t)n; + } + } + /* Spread symbols across the table. Lack of lowprob symbols means that + * we don't need variable sized inner loop, so we can unroll the loop and + * reduce branch misses. + */ + { size_t position = 0; + size_t s; + size_t const unroll = 2; /* Experimentally determined optimal unroll */ + assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */ + for (s = 0; s < (size_t)tableSize; s += unroll) { + size_t u; + for (u = 0; u < unroll; ++u) { + size_t const uPosition = (position + (u * step)) & tableMask; + tableSymbol[uPosition] = spread[s + u]; + } + position = (position + (unroll * step)) & tableMask; + } + assert(position == 0); /* Must have initialized all positions */ + } + } else { + U32 position = 0; + U32 symbol; + for (symbol=0; symbol highThreshold) + position = (position + step) & tableMask; /* Low proba area */ + } } + assert(position==0); /* Must have initialized all positions */ + } + + /* Build table */ + { U32 u; for (u=0; u 1); + { U32 const maxBitsOut = tableLog - ZSTD_highbit32 ((U32)normalizedCounter[s]-1); + U32 const minStatePlus = (U32)normalizedCounter[s] << maxBitsOut; + symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus; + symbolTT[s].deltaFindState = (int)(total - (unsigned)normalizedCounter[s]); + total += (unsigned)normalizedCounter[s]; + } } } } + +#if 0 /* debug : symbol costs */ + DEBUGLOG(5, "\n --- table statistics : "); + { U32 symbol; + for (symbol=0; symbol<=maxSymbolValue; symbol++) { + DEBUGLOG(5, "%3u: w=%3i, maxBits=%u, fracBits=%.2f", + symbol, normalizedCounter[symbol], + FSE_getMaxNbBits(symbolTT, symbol), + (double)FSE_bitCost(symbolTT, tableLog, symbol, 8) / 256); + } } +#endif + + return 0; +} + + + +#ifndef FSE_COMMONDEFS_ONLY + +/*-************************************************************** +* FSE NCount encoding +****************************************************************/ +size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog) +{ + size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog + + 4 /* bitCount initialized at 4 */ + + 2 /* first two symbols may use one additional bit each */) / 8) + + 1 /* round up to whole nb bytes */ + + 2 /* additional two bytes for bitstream flush */; + return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */ +} + +static size_t +FSE_writeNCount_generic (void* header, size_t headerBufferSize, + const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, + unsigned writeIsSafe) +{ + BYTE* const ostart = (BYTE*) header; + BYTE* out = ostart; + BYTE* const oend = ostart + headerBufferSize; + int nbBits; + const int tableSize = 1 << tableLog; + int remaining; + int threshold; + U32 bitStream = 0; + int bitCount = 0; + unsigned symbol = 0; + unsigned const alphabetSize = maxSymbolValue + 1; + int previousIs0 = 0; + + /* Table Size */ + bitStream += (tableLog-FSE_MIN_TABLELOG) << bitCount; + bitCount += 4; + + /* Init */ + remaining = tableSize+1; /* +1 for extra accuracy */ + threshold = tableSize; + nbBits = (int)tableLog+1; + + while ((symbol < alphabetSize) && (remaining>1)) { /* stops at 1 */ + if (previousIs0) { + unsigned start = symbol; + while ((symbol < alphabetSize) && !normalizedCounter[symbol]) symbol++; + if (symbol == alphabetSize) break; /* incorrect distribution */ + while (symbol >= start+24) { + start+=24; + bitStream += 0xFFFFU << bitCount; + if ((!writeIsSafe) && (out > oend-2)) + return ERROR(dstSize_tooSmall); /* Buffer overflow */ + out[0] = (BYTE) bitStream; + out[1] = (BYTE)(bitStream>>8); + out+=2; + bitStream>>=16; + } + while (symbol >= start+3) { + start+=3; + bitStream += 3U << bitCount; + bitCount += 2; + } + bitStream += (symbol-start) << bitCount; + bitCount += 2; + if (bitCount>16) { + if ((!writeIsSafe) && (out > oend - 2)) + return ERROR(dstSize_tooSmall); /* Buffer overflow */ + out[0] = (BYTE)bitStream; + out[1] = (BYTE)(bitStream>>8); + out += 2; + bitStream >>= 16; + bitCount -= 16; + } } + { int count = normalizedCounter[symbol++]; + int const max = (2*threshold-1) - remaining; + remaining -= count < 0 ? -count : count; + count++; /* +1 for extra accuracy */ + if (count>=threshold) + count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */ + bitStream += (U32)count << bitCount; + bitCount += nbBits; + bitCount -= (count>=1; } + } + if (bitCount>16) { + if ((!writeIsSafe) && (out > oend - 2)) + return ERROR(dstSize_tooSmall); /* Buffer overflow */ + out[0] = (BYTE)bitStream; + out[1] = (BYTE)(bitStream>>8); + out += 2; + bitStream >>= 16; + bitCount -= 16; + } } + + if (remaining != 1) + return ERROR(GENERIC); /* incorrect normalized distribution */ + assert(symbol <= alphabetSize); + + /* flush remaining bitStream */ + if ((!writeIsSafe) && (out > oend - 2)) + return ERROR(dstSize_tooSmall); /* Buffer overflow */ + out[0] = (BYTE)bitStream; + out[1] = (BYTE)(bitStream>>8); + out+= (bitCount+7) /8; + + assert(out >= ostart); + return (size_t)(out-ostart); +} + + +size_t FSE_writeNCount (void* buffer, size_t bufferSize, + const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) +{ + if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); /* Unsupported */ + if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC); /* Unsupported */ + + if (bufferSize < FSE_NCountWriteBound(maxSymbolValue, tableLog)) + return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 0); + + return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1 /* write in buffer is safe */); +} + + +/*-************************************************************** +* FSE Compression Code +****************************************************************/ + +/* provides the minimum logSize to safely represent a distribution */ +static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue) +{ + U32 minBitsSrc = ZSTD_highbit32((U32)(srcSize)) + 1; + U32 minBitsSymbols = ZSTD_highbit32(maxSymbolValue) + 2; + U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols; + assert(srcSize > 1); /* Not supported, RLE should be used instead */ + return minBits; +} + +unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus) +{ + U32 maxBitsSrc = ZSTD_highbit32((U32)(srcSize - 1)) - minus; + U32 tableLog = maxTableLog; + U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue); + assert(srcSize > 1); /* Not supported, RLE should be used instead */ + if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG; + if (maxBitsSrc < tableLog) tableLog = maxBitsSrc; /* Accuracy can be reduced */ + if (minBits > tableLog) tableLog = minBits; /* Need a minimum to safely represent all symbol values */ + if (tableLog < FSE_MIN_TABLELOG) tableLog = FSE_MIN_TABLELOG; + if (tableLog > FSE_MAX_TABLELOG) tableLog = FSE_MAX_TABLELOG; + return tableLog; +} + +unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue) +{ + return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 2); +} + +/* Secondary normalization method. + To be used when primary method fails. */ + +static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, size_t total, U32 maxSymbolValue, short lowProbCount) +{ + short const NOT_YET_ASSIGNED = -2; + U32 s; + U32 distributed = 0; + U32 ToDistribute; + + /* Init */ + U32 const lowThreshold = (U32)(total >> tableLog); + U32 lowOne = (U32)((total * 3) >> (tableLog + 1)); + + for (s=0; s<=maxSymbolValue; s++) { + if (count[s] == 0) { + norm[s]=0; + continue; + } + if (count[s] <= lowThreshold) { + norm[s] = lowProbCount; + distributed++; + total -= count[s]; + continue; + } + if (count[s] <= lowOne) { + norm[s] = 1; + distributed++; + total -= count[s]; + continue; + } + + norm[s]=NOT_YET_ASSIGNED; + } + ToDistribute = (1 << tableLog) - distributed; + + if (ToDistribute == 0) + return 0; + + if ((total / ToDistribute) > lowOne) { + /* risk of rounding to zero */ + lowOne = (U32)((total * 3) / (ToDistribute * 2)); + for (s=0; s<=maxSymbolValue; s++) { + if ((norm[s] == NOT_YET_ASSIGNED) && (count[s] <= lowOne)) { + norm[s] = 1; + distributed++; + total -= count[s]; + continue; + } } + ToDistribute = (1 << tableLog) - distributed; + } + + if (distributed == maxSymbolValue+1) { + /* all values are pretty poor; + probably incompressible data (should have already been detected); + find max, then give all remaining points to max */ + U32 maxV = 0, maxC = 0; + for (s=0; s<=maxSymbolValue; s++) + if (count[s] > maxC) { maxV=s; maxC=count[s]; } + norm[maxV] += (short)ToDistribute; + return 0; + } + + if (total == 0) { + /* all of the symbols were low enough for the lowOne or lowThreshold */ + for (s=0; ToDistribute > 0; s = (s+1)%(maxSymbolValue+1)) + if (norm[s] > 0) { ToDistribute--; norm[s]++; } + return 0; + } + + { U64 const vStepLog = 62 - tableLog; + U64 const mid = (1ULL << (vStepLog-1)) - 1; + U64 const rStep = ZSTD_div64((((U64)1<> vStepLog); + U32 const sEnd = (U32)(end >> vStepLog); + U32 const weight = sEnd - sStart; + if (weight < 1) + return ERROR(GENERIC); + norm[s] = (short)weight; + tmpTotal = end; + } } } + + return 0; +} + +size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog, + const unsigned* count, size_t total, + unsigned maxSymbolValue, unsigned useLowProbCount) +{ + /* Sanity checks */ + if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG; + if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC); /* Unsupported size */ + if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); /* Unsupported size */ + if (tableLog < FSE_minTableLog(total, maxSymbolValue)) return ERROR(GENERIC); /* Too small tableLog, compression potentially impossible */ + + { static U32 const rtbTable[] = { 0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 }; + short const lowProbCount = useLowProbCount ? -1 : 1; + U64 const scale = 62 - tableLog; + U64 const step = ZSTD_div64((U64)1<<62, (U32)total); /* <== here, one division ! */ + U64 const vStep = 1ULL<<(scale-20); + int stillToDistribute = 1<> tableLog); + + for (s=0; s<=maxSymbolValue; s++) { + if (count[s] == total) return 0; /* rle special case */ + if (count[s] == 0) { normalizedCounter[s]=0; continue; } + if (count[s] <= lowThreshold) { + normalizedCounter[s] = lowProbCount; + stillToDistribute--; + } else { + short proba = (short)((count[s]*step) >> scale); + if (proba<8) { + U64 restToBeat = vStep * rtbTable[proba]; + proba += (count[s]*step) - ((U64)proba< restToBeat; + } + if (proba > largestP) { largestP=proba; largest=s; } + normalizedCounter[s] = proba; + stillToDistribute -= proba; + } } + if (-stillToDistribute >= (normalizedCounter[largest] >> 1)) { + /* corner case, need another normalization method */ + size_t const errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue, lowProbCount); + if (FSE_isError(errorCode)) return errorCode; + } + else normalizedCounter[largest] += (short)stillToDistribute; + } + +#if 0 + { /* Print Table (debug) */ + U32 s; + U32 nTotal = 0; + for (s=0; s<=maxSymbolValue; s++) + RAWLOG(2, "%3i: %4i \n", s, normalizedCounter[s]); + for (s=0; s<=maxSymbolValue; s++) + nTotal += abs(normalizedCounter[s]); + if (nTotal != (1U< FSE_MAX_TABLELOG*4+7 ) && (srcSize & 2)) { /* test bit 2 */ + FSE_encodeSymbol(&bitC, &CState2, *--ip); + FSE_encodeSymbol(&bitC, &CState1, *--ip); + FSE_FLUSHBITS(&bitC); + } + + /* 2 or 4 encoding per loop */ + while ( ip>istart ) { + + FSE_encodeSymbol(&bitC, &CState2, *--ip); + + if (sizeof(bitC.bitContainer)*8 < FSE_MAX_TABLELOG*2+7 ) /* this test must be static */ + FSE_FLUSHBITS(&bitC); + + FSE_encodeSymbol(&bitC, &CState1, *--ip); + + if (sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) { /* this test must be static */ + FSE_encodeSymbol(&bitC, &CState2, *--ip); + FSE_encodeSymbol(&bitC, &CState1, *--ip); + } + + FSE_FLUSHBITS(&bitC); + } + + FSE_flushCState(&bitC, &CState2); + FSE_flushCState(&bitC, &CState1); + return BIT_closeCStream(&bitC); +} + +size_t FSE_compress_usingCTable (void* dst, size_t dstSize, + const void* src, size_t srcSize, + const FSE_CTable* ct) +{ + unsigned const fast = (dstSize >= FSE_BLOCKBOUND(srcSize)); + + if (fast) + return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 1); + else + return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 0); +} + + +size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); } + +#endif /* FSE_COMMONDEFS_ONLY */ diff --git a/c-blosc/internal-complibs/zstd-1.5.6/compress/hist.c b/c-blosc/internal-complibs/zstd-1.5.6/compress/hist.c new file mode 100644 index 0000000..e2fb431 --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.5.6/compress/hist.c @@ -0,0 +1,181 @@ +/* ****************************************************************** + * hist : Histogram functions + * part of Finite State Entropy project + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * You can contact the author at : + * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy + * - Public forum : https://groups.google.com/forum/#!forum/lz4c + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ + +/* --- dependencies --- */ +#include "../common/mem.h" /* U32, BYTE, etc. */ +#include "../common/debug.h" /* assert, DEBUGLOG */ +#include "../common/error_private.h" /* ERROR */ +#include "hist.h" + + +/* --- Error management --- */ +unsigned HIST_isError(size_t code) { return ERR_isError(code); } + +/*-************************************************************** + * Histogram functions + ****************************************************************/ +unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, + const void* src, size_t srcSize) +{ + const BYTE* ip = (const BYTE*)src; + const BYTE* const end = ip + srcSize; + unsigned maxSymbolValue = *maxSymbolValuePtr; + unsigned largestCount=0; + + ZSTD_memset(count, 0, (maxSymbolValue+1) * sizeof(*count)); + if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; } + + while (ip largestCount) largestCount = count[s]; + } + + return largestCount; +} + +typedef enum { trustInput, checkMaxSymbolValue } HIST_checkInput_e; + +/* HIST_count_parallel_wksp() : + * store histogram into 4 intermediate tables, recombined at the end. + * this design makes better use of OoO cpus, + * and is noticeably faster when some values are heavily repeated. + * But it needs some additional workspace for intermediate tables. + * `workSpace` must be a U32 table of size >= HIST_WKSP_SIZE_U32. + * @return : largest histogram frequency, + * or an error code (notably when histogram's alphabet is larger than *maxSymbolValuePtr) */ +static size_t HIST_count_parallel_wksp( + unsigned* count, unsigned* maxSymbolValuePtr, + const void* source, size_t sourceSize, + HIST_checkInput_e check, + U32* const workSpace) +{ + const BYTE* ip = (const BYTE*)source; + const BYTE* const iend = ip+sourceSize; + size_t const countSize = (*maxSymbolValuePtr + 1) * sizeof(*count); + unsigned max=0; + U32* const Counting1 = workSpace; + U32* const Counting2 = Counting1 + 256; + U32* const Counting3 = Counting2 + 256; + U32* const Counting4 = Counting3 + 256; + + /* safety checks */ + assert(*maxSymbolValuePtr <= 255); + if (!sourceSize) { + ZSTD_memset(count, 0, countSize); + *maxSymbolValuePtr = 0; + return 0; + } + ZSTD_memset(workSpace, 0, 4*256*sizeof(unsigned)); + + /* by stripes of 16 bytes */ + { U32 cached = MEM_read32(ip); ip += 4; + while (ip < iend-15) { + U32 c = cached; cached = MEM_read32(ip); ip += 4; + Counting1[(BYTE) c ]++; + Counting2[(BYTE)(c>>8) ]++; + Counting3[(BYTE)(c>>16)]++; + Counting4[ c>>24 ]++; + c = cached; cached = MEM_read32(ip); ip += 4; + Counting1[(BYTE) c ]++; + Counting2[(BYTE)(c>>8) ]++; + Counting3[(BYTE)(c>>16)]++; + Counting4[ c>>24 ]++; + c = cached; cached = MEM_read32(ip); ip += 4; + Counting1[(BYTE) c ]++; + Counting2[(BYTE)(c>>8) ]++; + Counting3[(BYTE)(c>>16)]++; + Counting4[ c>>24 ]++; + c = cached; cached = MEM_read32(ip); ip += 4; + Counting1[(BYTE) c ]++; + Counting2[(BYTE)(c>>8) ]++; + Counting3[(BYTE)(c>>16)]++; + Counting4[ c>>24 ]++; + } + ip-=4; + } + + /* finish last symbols */ + while (ip max) max = Counting1[s]; + } } + + { unsigned maxSymbolValue = 255; + while (!Counting1[maxSymbolValue]) maxSymbolValue--; + if (check && maxSymbolValue > *maxSymbolValuePtr) return ERROR(maxSymbolValue_tooSmall); + *maxSymbolValuePtr = maxSymbolValue; + ZSTD_memmove(count, Counting1, countSize); /* in case count & Counting1 are overlapping */ + } + return (size_t)max; +} + +/* HIST_countFast_wksp() : + * Same as HIST_countFast(), but using an externally provided scratch buffer. + * `workSpace` is a writable buffer which must be 4-bytes aligned, + * `workSpaceSize` must be >= HIST_WKSP_SIZE + */ +size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr, + const void* source, size_t sourceSize, + void* workSpace, size_t workSpaceSize) +{ + if (sourceSize < 1500) /* heuristic threshold */ + return HIST_count_simple(count, maxSymbolValuePtr, source, sourceSize); + if ((size_t)workSpace & 3) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */ + if (workSpaceSize < HIST_WKSP_SIZE) return ERROR(workSpace_tooSmall); + return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, trustInput, (U32*)workSpace); +} + +/* HIST_count_wksp() : + * Same as HIST_count(), but using an externally provided scratch buffer. + * `workSpace` size must be table of >= HIST_WKSP_SIZE_U32 unsigned */ +size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr, + const void* source, size_t sourceSize, + void* workSpace, size_t workSpaceSize) +{ + if ((size_t)workSpace & 3) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */ + if (workSpaceSize < HIST_WKSP_SIZE) return ERROR(workSpace_tooSmall); + if (*maxSymbolValuePtr < 255) + return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, checkMaxSymbolValue, (U32*)workSpace); + *maxSymbolValuePtr = 255; + return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace, workSpaceSize); +} + +#ifndef ZSTD_NO_UNUSED_FUNCTIONS +/* fast variant (unsafe : won't check if src contains values beyond count[] limit) */ +size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr, + const void* source, size_t sourceSize) +{ + unsigned tmpCounters[HIST_WKSP_SIZE_U32]; + return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters, sizeof(tmpCounters)); +} + +size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr, + const void* src, size_t srcSize) +{ + unsigned tmpCounters[HIST_WKSP_SIZE_U32]; + return HIST_count_wksp(count, maxSymbolValuePtr, src, srcSize, tmpCounters, sizeof(tmpCounters)); +} +#endif diff --git a/c-blosc/internal-complibs/zstd-1.5.6/compress/hist.h b/c-blosc/internal-complibs/zstd-1.5.6/compress/hist.h new file mode 100644 index 0000000..887896b --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.5.6/compress/hist.h @@ -0,0 +1,75 @@ +/* ****************************************************************** + * hist : Histogram functions + * part of Finite State Entropy project + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * You can contact the author at : + * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy + * - Public forum : https://groups.google.com/forum/#!forum/lz4c + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ + +/* --- dependencies --- */ +#include "../common/zstd_deps.h" /* size_t */ + + +/* --- simple histogram functions --- */ + +/*! HIST_count(): + * Provides the precise count of each byte within a table 'count'. + * 'count' is a table of unsigned int, of minimum size (*maxSymbolValuePtr+1). + * Updates *maxSymbolValuePtr with actual largest symbol value detected. + * @return : count of the most frequent symbol (which isn't identified). + * or an error code, which can be tested using HIST_isError(). + * note : if return == srcSize, there is only one symbol. + */ +size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr, + const void* src, size_t srcSize); + +unsigned HIST_isError(size_t code); /**< tells if a return value is an error code */ + + +/* --- advanced histogram functions --- */ + +#define HIST_WKSP_SIZE_U32 1024 +#define HIST_WKSP_SIZE (HIST_WKSP_SIZE_U32 * sizeof(unsigned)) +/** HIST_count_wksp() : + * Same as HIST_count(), but using an externally provided scratch buffer. + * Benefit is this function will use very little stack space. + * `workSpace` is a writable buffer which must be 4-bytes aligned, + * `workSpaceSize` must be >= HIST_WKSP_SIZE + */ +size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr, + const void* src, size_t srcSize, + void* workSpace, size_t workSpaceSize); + +/** HIST_countFast() : + * same as HIST_count(), but blindly trusts that all byte values within src are <= *maxSymbolValuePtr. + * This function is unsafe, and will segfault if any value within `src` is `> *maxSymbolValuePtr` + */ +size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr, + const void* src, size_t srcSize); + +/** HIST_countFast_wksp() : + * Same as HIST_countFast(), but using an externally provided scratch buffer. + * `workSpace` is a writable buffer which must be 4-bytes aligned, + * `workSpaceSize` must be >= HIST_WKSP_SIZE + */ +size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr, + const void* src, size_t srcSize, + void* workSpace, size_t workSpaceSize); + +/*! HIST_count_simple() : + * Same as HIST_countFast(), this function is unsafe, + * and will segfault if any value within `src` is `> *maxSymbolValuePtr`. + * It is also a bit slower for large inputs. + * However, it does not need any additional memory (not even on stack). + * @return : count of the most frequent symbol. + * Note this function doesn't produce any error (i.e. it must succeed). + */ +unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, + const void* src, size_t srcSize); diff --git a/c-blosc/internal-complibs/zstd-1.5.6/compress/huf_compress.c b/c-blosc/internal-complibs/zstd-1.5.6/compress/huf_compress.c new file mode 100644 index 0000000..ea00072 --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.5.6/compress/huf_compress.c @@ -0,0 +1,1464 @@ +/* ****************************************************************** + * Huffman encoder, part of New Generation Entropy library + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * You can contact the author at : + * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy + * - Public forum : https://groups.google.com/forum/#!forum/lz4c + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ + +/* ************************************************************** +* Compiler specifics +****************************************************************/ +#ifdef _MSC_VER /* Visual Studio */ +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +#endif + + +/* ************************************************************** +* Includes +****************************************************************/ +#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memset */ +#include "../common/compiler.h" +#include "../common/bitstream.h" +#include "hist.h" +#define FSE_STATIC_LINKING_ONLY /* FSE_optimalTableLog_internal */ +#include "../common/fse.h" /* header compression */ +#include "../common/huf.h" +#include "../common/error_private.h" +#include "../common/bits.h" /* ZSTD_highbit32 */ + + +/* ************************************************************** +* Error Management +****************************************************************/ +#define HUF_isError ERR_isError +#define HUF_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */ + + +/* ************************************************************** +* Required declarations +****************************************************************/ +typedef struct nodeElt_s { + U32 count; + U16 parent; + BYTE byte; + BYTE nbBits; +} nodeElt; + + +/* ************************************************************** +* Debug Traces +****************************************************************/ + +#if DEBUGLEVEL >= 2 + +static size_t showU32(const U32* arr, size_t size) +{ + size_t u; + for (u=0; u= add) { + assert(add < align); + assert(((size_t)aligned & mask) == 0); + *workspaceSizePtr -= add; + return aligned; + } else { + *workspaceSizePtr = 0; + return NULL; + } +} + + +/* HUF_compressWeights() : + * Same as FSE_compress(), but dedicated to huff0's weights compression. + * The use case needs much less stack memory. + * Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX. + */ +#define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6 + +typedef struct { + FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)]; + U32 scratchBuffer[FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(HUF_TABLELOG_MAX, MAX_FSE_TABLELOG_FOR_HUFF_HEADER)]; + unsigned count[HUF_TABLELOG_MAX+1]; + S16 norm[HUF_TABLELOG_MAX+1]; +} HUF_CompressWeightsWksp; + +static size_t +HUF_compressWeights(void* dst, size_t dstSize, + const void* weightTable, size_t wtSize, + void* workspace, size_t workspaceSize) +{ + BYTE* const ostart = (BYTE*) dst; + BYTE* op = ostart; + BYTE* const oend = ostart + dstSize; + + unsigned maxSymbolValue = HUF_TABLELOG_MAX; + U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER; + HUF_CompressWeightsWksp* wksp = (HUF_CompressWeightsWksp*)HUF_alignUpWorkspace(workspace, &workspaceSize, ZSTD_ALIGNOF(U32)); + + if (workspaceSize < sizeof(HUF_CompressWeightsWksp)) return ERROR(GENERIC); + + /* init conditions */ + if (wtSize <= 1) return 0; /* Not compressible */ + + /* Scan input and build symbol stats */ + { unsigned const maxCount = HIST_count_simple(wksp->count, &maxSymbolValue, weightTable, wtSize); /* never fails */ + if (maxCount == wtSize) return 1; /* only a single symbol in src : rle */ + if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */ + } + + tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue); + CHECK_F( FSE_normalizeCount(wksp->norm, tableLog, wksp->count, wtSize, maxSymbolValue, /* useLowProbCount */ 0) ); + + /* Write table description header */ + { CHECK_V_F(hSize, FSE_writeNCount(op, (size_t)(oend-op), wksp->norm, maxSymbolValue, tableLog) ); + op += hSize; + } + + /* Compress */ + CHECK_F( FSE_buildCTable_wksp(wksp->CTable, wksp->norm, maxSymbolValue, tableLog, wksp->scratchBuffer, sizeof(wksp->scratchBuffer)) ); + { CHECK_V_F(cSize, FSE_compress_usingCTable(op, (size_t)(oend - op), weightTable, wtSize, wksp->CTable) ); + if (cSize == 0) return 0; /* not enough space for compressed data */ + op += cSize; + } + + return (size_t)(op-ostart); +} + +static size_t HUF_getNbBits(HUF_CElt elt) +{ + return elt & 0xFF; +} + +static size_t HUF_getNbBitsFast(HUF_CElt elt) +{ + return elt; +} + +static size_t HUF_getValue(HUF_CElt elt) +{ + return elt & ~(size_t)0xFF; +} + +static size_t HUF_getValueFast(HUF_CElt elt) +{ + return elt; +} + +static void HUF_setNbBits(HUF_CElt* elt, size_t nbBits) +{ + assert(nbBits <= HUF_TABLELOG_ABSOLUTEMAX); + *elt = nbBits; +} + +static void HUF_setValue(HUF_CElt* elt, size_t value) +{ + size_t const nbBits = HUF_getNbBits(*elt); + if (nbBits > 0) { + assert((value >> nbBits) == 0); + *elt |= value << (sizeof(HUF_CElt) * 8 - nbBits); + } +} + +HUF_CTableHeader HUF_readCTableHeader(HUF_CElt const* ctable) +{ + HUF_CTableHeader header; + ZSTD_memcpy(&header, ctable, sizeof(header)); + return header; +} + +static void HUF_writeCTableHeader(HUF_CElt* ctable, U32 tableLog, U32 maxSymbolValue) +{ + HUF_CTableHeader header; + HUF_STATIC_ASSERT(sizeof(ctable[0]) == sizeof(header)); + ZSTD_memset(&header, 0, sizeof(header)); + assert(tableLog < 256); + header.tableLog = (BYTE)tableLog; + assert(maxSymbolValue < 256); + header.maxSymbolValue = (BYTE)maxSymbolValue; + ZSTD_memcpy(ctable, &header, sizeof(header)); +} + +typedef struct { + HUF_CompressWeightsWksp wksp; + BYTE bitsToWeight[HUF_TABLELOG_MAX + 1]; /* precomputed conversion table */ + BYTE huffWeight[HUF_SYMBOLVALUE_MAX]; +} HUF_WriteCTableWksp; + +size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize, + const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog, + void* workspace, size_t workspaceSize) +{ + HUF_CElt const* const ct = CTable + 1; + BYTE* op = (BYTE*)dst; + U32 n; + HUF_WriteCTableWksp* wksp = (HUF_WriteCTableWksp*)HUF_alignUpWorkspace(workspace, &workspaceSize, ZSTD_ALIGNOF(U32)); + + HUF_STATIC_ASSERT(HUF_CTABLE_WORKSPACE_SIZE >= sizeof(HUF_WriteCTableWksp)); + + assert(HUF_readCTableHeader(CTable).maxSymbolValue == maxSymbolValue); + assert(HUF_readCTableHeader(CTable).tableLog == huffLog); + + /* check conditions */ + if (workspaceSize < sizeof(HUF_WriteCTableWksp)) return ERROR(GENERIC); + if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge); + + /* convert to weight */ + wksp->bitsToWeight[0] = 0; + for (n=1; nbitsToWeight[n] = (BYTE)(huffLog + 1 - n); + for (n=0; nhuffWeight[n] = wksp->bitsToWeight[HUF_getNbBits(ct[n])]; + + /* attempt weights compression by FSE */ + if (maxDstSize < 1) return ERROR(dstSize_tooSmall); + { CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, wksp->huffWeight, maxSymbolValue, &wksp->wksp, sizeof(wksp->wksp)) ); + if ((hSize>1) & (hSize < maxSymbolValue/2)) { /* FSE compressed */ + op[0] = (BYTE)hSize; + return hSize+1; + } } + + /* write raw values as 4-bits (max : 15) */ + if (maxSymbolValue > (256-128)) return ERROR(GENERIC); /* should not happen : likely means source cannot be compressed */ + if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall); /* not enough space within dst buffer */ + op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue-1)); + wksp->huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause msan issue in final combination */ + for (n=0; nhuffWeight[n] << 4) + wksp->huffWeight[n+1]); + return ((maxSymbolValue+1)/2) + 1; +} + + +size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* hasZeroWeights) +{ + BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1]; /* init not required, even though some static analyzer may complain */ + U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; /* large enough for values from 0 to 16 */ + U32 tableLog = 0; + U32 nbSymbols = 0; + HUF_CElt* const ct = CTable + 1; + + /* get symbol weights */ + CHECK_V_F(readSize, HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX+1, rankVal, &nbSymbols, &tableLog, src, srcSize)); + *hasZeroWeights = (rankVal[0] > 0); + + /* check result */ + if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); + if (nbSymbols > *maxSymbolValuePtr+1) return ERROR(maxSymbolValue_tooSmall); + + *maxSymbolValuePtr = nbSymbols - 1; + + HUF_writeCTableHeader(CTable, tableLog, *maxSymbolValuePtr); + + /* Prepare base value per rank */ + { U32 n, nextRankStart = 0; + for (n=1; n<=tableLog; n++) { + U32 curr = nextRankStart; + nextRankStart += (rankVal[n] << (n-1)); + rankVal[n] = curr; + } } + + /* fill nbBits */ + { U32 n; for (n=0; nn=tableLog+1 */ + U16 valPerRank[HUF_TABLELOG_MAX+2] = {0}; + { U32 n; for (n=0; n0; n--) { /* start at n=tablelog <-> w=1 */ + valPerRank[n] = min; /* get starting value within each rank */ + min += nbPerRank[n]; + min >>= 1; + } } + /* assign value within rank, symbol order */ + { U32 n; for (n=0; n HUF_readCTableHeader(CTable).maxSymbolValue) + return 0; + return (U32)HUF_getNbBits(ct[symbolValue]); +} + + +/** + * HUF_setMaxHeight(): + * Try to enforce @targetNbBits on the Huffman tree described in @huffNode. + * + * It attempts to convert all nodes with nbBits > @targetNbBits + * to employ @targetNbBits instead. Then it adjusts the tree + * so that it remains a valid canonical Huffman tree. + * + * @pre The sum of the ranks of each symbol == 2^largestBits, + * where largestBits == huffNode[lastNonNull].nbBits. + * @post The sum of the ranks of each symbol == 2^largestBits, + * where largestBits is the return value (expected <= targetNbBits). + * + * @param huffNode The Huffman tree modified in place to enforce targetNbBits. + * It's presumed sorted, from most frequent to rarest symbol. + * @param lastNonNull The symbol with the lowest count in the Huffman tree. + * @param targetNbBits The allowed number of bits, which the Huffman tree + * may not respect. After this function the Huffman tree will + * respect targetNbBits. + * @return The maximum number of bits of the Huffman tree after adjustment. + */ +static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 targetNbBits) +{ + const U32 largestBits = huffNode[lastNonNull].nbBits; + /* early exit : no elt > targetNbBits, so the tree is already valid. */ + if (largestBits <= targetNbBits) return largestBits; + + DEBUGLOG(5, "HUF_setMaxHeight (targetNbBits = %u)", targetNbBits); + + /* there are several too large elements (at least >= 2) */ + { int totalCost = 0; + const U32 baseCost = 1 << (largestBits - targetNbBits); + int n = (int)lastNonNull; + + /* Adjust any ranks > targetNbBits to targetNbBits. + * Compute totalCost, which is how far the sum of the ranks is + * we are over 2^largestBits after adjust the offending ranks. + */ + while (huffNode[n].nbBits > targetNbBits) { + totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits)); + huffNode[n].nbBits = (BYTE)targetNbBits; + n--; + } + /* n stops at huffNode[n].nbBits <= targetNbBits */ + assert(huffNode[n].nbBits <= targetNbBits); + /* n end at index of smallest symbol using < targetNbBits */ + while (huffNode[n].nbBits == targetNbBits) --n; + + /* renorm totalCost from 2^largestBits to 2^targetNbBits + * note : totalCost is necessarily a multiple of baseCost */ + assert(((U32)totalCost & (baseCost - 1)) == 0); + totalCost >>= (largestBits - targetNbBits); + assert(totalCost > 0); + + /* repay normalized cost */ + { U32 const noSymbol = 0xF0F0F0F0; + U32 rankLast[HUF_TABLELOG_MAX+2]; + + /* Get pos of last (smallest = lowest cum. count) symbol per rank */ + ZSTD_memset(rankLast, 0xF0, sizeof(rankLast)); + { U32 currentNbBits = targetNbBits; + int pos; + for (pos=n ; pos >= 0; pos--) { + if (huffNode[pos].nbBits >= currentNbBits) continue; + currentNbBits = huffNode[pos].nbBits; /* < targetNbBits */ + rankLast[targetNbBits-currentNbBits] = (U32)pos; + } } + + while (totalCost > 0) { + /* Try to reduce the next power of 2 above totalCost because we + * gain back half the rank. + */ + U32 nBitsToDecrease = ZSTD_highbit32((U32)totalCost) + 1; + for ( ; nBitsToDecrease > 1; nBitsToDecrease--) { + U32 const highPos = rankLast[nBitsToDecrease]; + U32 const lowPos = rankLast[nBitsToDecrease-1]; + if (highPos == noSymbol) continue; + /* Decrease highPos if no symbols of lowPos or if it is + * not cheaper to remove 2 lowPos than highPos. + */ + if (lowPos == noSymbol) break; + { U32 const highTotal = huffNode[highPos].count; + U32 const lowTotal = 2 * huffNode[lowPos].count; + if (highTotal <= lowTotal) break; + } } + /* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */ + assert(rankLast[nBitsToDecrease] != noSymbol || nBitsToDecrease == 1); + /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */ + while ((nBitsToDecrease<=HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol)) + nBitsToDecrease++; + assert(rankLast[nBitsToDecrease] != noSymbol); + /* Increase the number of bits to gain back half the rank cost. */ + totalCost -= 1 << (nBitsToDecrease-1); + huffNode[rankLast[nBitsToDecrease]].nbBits++; + + /* Fix up the new rank. + * If the new rank was empty, this symbol is now its smallest. + * Otherwise, this symbol will be the largest in the new rank so no adjustment. + */ + if (rankLast[nBitsToDecrease-1] == noSymbol) + rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease]; + /* Fix up the old rank. + * If the symbol was at position 0, meaning it was the highest weight symbol in the tree, + * it must be the only symbol in its rank, so the old rank now has no symbols. + * Otherwise, since the Huffman nodes are sorted by count, the previous position is now + * the smallest node in the rank. If the previous position belongs to a different rank, + * then the rank is now empty. + */ + if (rankLast[nBitsToDecrease] == 0) /* special case, reached largest symbol */ + rankLast[nBitsToDecrease] = noSymbol; + else { + rankLast[nBitsToDecrease]--; + if (huffNode[rankLast[nBitsToDecrease]].nbBits != targetNbBits-nBitsToDecrease) + rankLast[nBitsToDecrease] = noSymbol; /* this rank is now empty */ + } + } /* while (totalCost > 0) */ + + /* If we've removed too much weight, then we have to add it back. + * To avoid overshooting again, we only adjust the smallest rank. + * We take the largest nodes from the lowest rank 0 and move them + * to rank 1. There's guaranteed to be enough rank 0 symbols because + * TODO. + */ + while (totalCost < 0) { /* Sometimes, cost correction overshoot */ + /* special case : no rank 1 symbol (using targetNbBits-1); + * let's create one from largest rank 0 (using targetNbBits). + */ + if (rankLast[1] == noSymbol) { + while (huffNode[n].nbBits == targetNbBits) n--; + huffNode[n+1].nbBits--; + assert(n >= 0); + rankLast[1] = (U32)(n+1); + totalCost++; + continue; + } + huffNode[ rankLast[1] + 1 ].nbBits--; + rankLast[1]++; + totalCost ++; + } + } /* repay normalized cost */ + } /* there are several too large elements (at least >= 2) */ + + return targetNbBits; +} + +typedef struct { + U16 base; + U16 curr; +} rankPos; + +typedef nodeElt huffNodeTable[2 * (HUF_SYMBOLVALUE_MAX + 1)]; + +/* Number of buckets available for HUF_sort() */ +#define RANK_POSITION_TABLE_SIZE 192 + +typedef struct { + huffNodeTable huffNodeTbl; + rankPos rankPosition[RANK_POSITION_TABLE_SIZE]; +} HUF_buildCTable_wksp_tables; + +/* RANK_POSITION_DISTINCT_COUNT_CUTOFF == Cutoff point in HUF_sort() buckets for which we use log2 bucketing. + * Strategy is to use as many buckets as possible for representing distinct + * counts while using the remainder to represent all "large" counts. + * + * To satisfy this requirement for 192 buckets, we can do the following: + * Let buckets 0-166 represent distinct counts of [0, 166] + * Let buckets 166 to 192 represent all remaining counts up to RANK_POSITION_MAX_COUNT_LOG using log2 bucketing. + */ +#define RANK_POSITION_MAX_COUNT_LOG 32 +#define RANK_POSITION_LOG_BUCKETS_BEGIN ((RANK_POSITION_TABLE_SIZE - 1) - RANK_POSITION_MAX_COUNT_LOG - 1 /* == 158 */) +#define RANK_POSITION_DISTINCT_COUNT_CUTOFF (RANK_POSITION_LOG_BUCKETS_BEGIN + ZSTD_highbit32(RANK_POSITION_LOG_BUCKETS_BEGIN) /* == 166 */) + +/* Return the appropriate bucket index for a given count. See definition of + * RANK_POSITION_DISTINCT_COUNT_CUTOFF for explanation of bucketing strategy. + */ +static U32 HUF_getIndex(U32 const count) { + return (count < RANK_POSITION_DISTINCT_COUNT_CUTOFF) + ? count + : ZSTD_highbit32(count) + RANK_POSITION_LOG_BUCKETS_BEGIN; +} + +/* Helper swap function for HUF_quickSortPartition() */ +static void HUF_swapNodes(nodeElt* a, nodeElt* b) { + nodeElt tmp = *a; + *a = *b; + *b = tmp; +} + +/* Returns 0 if the huffNode array is not sorted by descending count */ +MEM_STATIC int HUF_isSorted(nodeElt huffNode[], U32 const maxSymbolValue1) { + U32 i; + for (i = 1; i < maxSymbolValue1; ++i) { + if (huffNode[i].count > huffNode[i-1].count) { + return 0; + } + } + return 1; +} + +/* Insertion sort by descending order */ +HINT_INLINE void HUF_insertionSort(nodeElt huffNode[], int const low, int const high) { + int i; + int const size = high-low+1; + huffNode += low; + for (i = 1; i < size; ++i) { + nodeElt const key = huffNode[i]; + int j = i - 1; + while (j >= 0 && huffNode[j].count < key.count) { + huffNode[j + 1] = huffNode[j]; + j--; + } + huffNode[j + 1] = key; + } +} + +/* Pivot helper function for quicksort. */ +static int HUF_quickSortPartition(nodeElt arr[], int const low, int const high) { + /* Simply select rightmost element as pivot. "Better" selectors like + * median-of-three don't experimentally appear to have any benefit. + */ + U32 const pivot = arr[high].count; + int i = low - 1; + int j = low; + for ( ; j < high; j++) { + if (arr[j].count > pivot) { + i++; + HUF_swapNodes(&arr[i], &arr[j]); + } + } + HUF_swapNodes(&arr[i + 1], &arr[high]); + return i + 1; +} + +/* Classic quicksort by descending with partially iterative calls + * to reduce worst case callstack size. + */ +static void HUF_simpleQuickSort(nodeElt arr[], int low, int high) { + int const kInsertionSortThreshold = 8; + if (high - low < kInsertionSortThreshold) { + HUF_insertionSort(arr, low, high); + return; + } + while (low < high) { + int const idx = HUF_quickSortPartition(arr, low, high); + if (idx - low < high - idx) { + HUF_simpleQuickSort(arr, low, idx - 1); + low = idx + 1; + } else { + HUF_simpleQuickSort(arr, idx + 1, high); + high = idx - 1; + } + } +} + +/** + * HUF_sort(): + * Sorts the symbols [0, maxSymbolValue] by count[symbol] in decreasing order. + * This is a typical bucket sorting strategy that uses either quicksort or insertion sort to sort each bucket. + * + * @param[out] huffNode Sorted symbols by decreasing count. Only members `.count` and `.byte` are filled. + * Must have (maxSymbolValue + 1) entries. + * @param[in] count Histogram of the symbols. + * @param[in] maxSymbolValue Maximum symbol value. + * @param rankPosition This is a scratch workspace. Must have RANK_POSITION_TABLE_SIZE entries. + */ +static void HUF_sort(nodeElt huffNode[], const unsigned count[], U32 const maxSymbolValue, rankPos rankPosition[]) { + U32 n; + U32 const maxSymbolValue1 = maxSymbolValue+1; + + /* Compute base and set curr to base. + * For symbol s let lowerRank = HUF_getIndex(count[n]) and rank = lowerRank + 1. + * See HUF_getIndex to see bucketing strategy. + * We attribute each symbol to lowerRank's base value, because we want to know where + * each rank begins in the output, so for rank R we want to count ranks R+1 and above. + */ + ZSTD_memset(rankPosition, 0, sizeof(*rankPosition) * RANK_POSITION_TABLE_SIZE); + for (n = 0; n < maxSymbolValue1; ++n) { + U32 lowerRank = HUF_getIndex(count[n]); + assert(lowerRank < RANK_POSITION_TABLE_SIZE - 1); + rankPosition[lowerRank].base++; + } + + assert(rankPosition[RANK_POSITION_TABLE_SIZE - 1].base == 0); + /* Set up the rankPosition table */ + for (n = RANK_POSITION_TABLE_SIZE - 1; n > 0; --n) { + rankPosition[n-1].base += rankPosition[n].base; + rankPosition[n-1].curr = rankPosition[n-1].base; + } + + /* Insert each symbol into their appropriate bucket, setting up rankPosition table. */ + for (n = 0; n < maxSymbolValue1; ++n) { + U32 const c = count[n]; + U32 const r = HUF_getIndex(c) + 1; + U32 const pos = rankPosition[r].curr++; + assert(pos < maxSymbolValue1); + huffNode[pos].count = c; + huffNode[pos].byte = (BYTE)n; + } + + /* Sort each bucket. */ + for (n = RANK_POSITION_DISTINCT_COUNT_CUTOFF; n < RANK_POSITION_TABLE_SIZE - 1; ++n) { + int const bucketSize = rankPosition[n].curr - rankPosition[n].base; + U32 const bucketStartIdx = rankPosition[n].base; + if (bucketSize > 1) { + assert(bucketStartIdx < maxSymbolValue1); + HUF_simpleQuickSort(huffNode + bucketStartIdx, 0, bucketSize-1); + } + } + + assert(HUF_isSorted(huffNode, maxSymbolValue1)); +} + + +/** HUF_buildCTable_wksp() : + * Same as HUF_buildCTable(), but using externally allocated scratch buffer. + * `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as sizeof(HUF_buildCTable_wksp_tables). + */ +#define STARTNODE (HUF_SYMBOLVALUE_MAX+1) + +/* HUF_buildTree(): + * Takes the huffNode array sorted by HUF_sort() and builds an unlimited-depth Huffman tree. + * + * @param huffNode The array sorted by HUF_sort(). Builds the Huffman tree in this array. + * @param maxSymbolValue The maximum symbol value. + * @return The smallest node in the Huffman tree (by count). + */ +static int HUF_buildTree(nodeElt* huffNode, U32 maxSymbolValue) +{ + nodeElt* const huffNode0 = huffNode - 1; + int nonNullRank; + int lowS, lowN; + int nodeNb = STARTNODE; + int n, nodeRoot; + DEBUGLOG(5, "HUF_buildTree (alphabet size = %u)", maxSymbolValue + 1); + /* init for parents */ + nonNullRank = (int)maxSymbolValue; + while(huffNode[nonNullRank].count == 0) nonNullRank--; + lowS = nonNullRank; nodeRoot = nodeNb + lowS - 1; lowN = nodeNb; + huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count; + huffNode[lowS].parent = huffNode[lowS-1].parent = (U16)nodeNb; + nodeNb++; lowS-=2; + for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].count = (U32)(1U<<30); + huffNode0[0].count = (U32)(1U<<31); /* fake entry, strong barrier */ + + /* create parents */ + while (nodeNb <= nodeRoot) { + int const n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++; + int const n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++; + huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count; + huffNode[n1].parent = huffNode[n2].parent = (U16)nodeNb; + nodeNb++; + } + + /* distribute weights (unlimited tree height) */ + huffNode[nodeRoot].nbBits = 0; + for (n=nodeRoot-1; n>=STARTNODE; n--) + huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1; + for (n=0; n<=nonNullRank; n++) + huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1; + + DEBUGLOG(6, "Initial distribution of bits completed (%zu sorted symbols)", showHNodeBits(huffNode, maxSymbolValue+1)); + + return nonNullRank; +} + +/** + * HUF_buildCTableFromTree(): + * Build the CTable given the Huffman tree in huffNode. + * + * @param[out] CTable The output Huffman CTable. + * @param huffNode The Huffman tree. + * @param nonNullRank The last and smallest node in the Huffman tree. + * @param maxSymbolValue The maximum symbol value. + * @param maxNbBits The exact maximum number of bits used in the Huffman tree. + */ +static void HUF_buildCTableFromTree(HUF_CElt* CTable, nodeElt const* huffNode, int nonNullRank, U32 maxSymbolValue, U32 maxNbBits) +{ + HUF_CElt* const ct = CTable + 1; + /* fill result into ctable (val, nbBits) */ + int n; + U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0}; + U16 valPerRank[HUF_TABLELOG_MAX+1] = {0}; + int const alphabetSize = (int)(maxSymbolValue + 1); + for (n=0; n<=nonNullRank; n++) + nbPerRank[huffNode[n].nbBits]++; + /* determine starting value per rank */ + { U16 min = 0; + for (n=(int)maxNbBits; n>0; n--) { + valPerRank[n] = min; /* get starting value within each rank */ + min += nbPerRank[n]; + min >>= 1; + } } + for (n=0; nhuffNodeTbl; + nodeElt* const huffNode = huffNode0+1; + int nonNullRank; + + HUF_STATIC_ASSERT(HUF_CTABLE_WORKSPACE_SIZE == sizeof(HUF_buildCTable_wksp_tables)); + + DEBUGLOG(5, "HUF_buildCTable_wksp (alphabet size = %u)", maxSymbolValue+1); + + /* safety checks */ + if (wkspSize < sizeof(HUF_buildCTable_wksp_tables)) + return ERROR(workSpace_tooSmall); + if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT; + if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) + return ERROR(maxSymbolValue_tooLarge); + ZSTD_memset(huffNode0, 0, sizeof(huffNodeTable)); + + /* sort, decreasing order */ + HUF_sort(huffNode, count, maxSymbolValue, wksp_tables->rankPosition); + DEBUGLOG(6, "sorted symbols completed (%zu symbols)", showHNodeSymbols(huffNode, maxSymbolValue+1)); + + /* build tree */ + nonNullRank = HUF_buildTree(huffNode, maxSymbolValue); + + /* determine and enforce maxTableLog */ + maxNbBits = HUF_setMaxHeight(huffNode, (U32)nonNullRank, maxNbBits); + if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */ + + HUF_buildCTableFromTree(CTable, huffNode, nonNullRank, maxSymbolValue, maxNbBits); + + return maxNbBits; +} + +size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) +{ + HUF_CElt const* ct = CTable + 1; + size_t nbBits = 0; + int s; + for (s = 0; s <= (int)maxSymbolValue; ++s) { + nbBits += HUF_getNbBits(ct[s]) * count[s]; + } + return nbBits >> 3; +} + +int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) { + HUF_CTableHeader header = HUF_readCTableHeader(CTable); + HUF_CElt const* ct = CTable + 1; + int bad = 0; + int s; + + assert(header.tableLog <= HUF_TABLELOG_ABSOLUTEMAX); + + if (header.maxSymbolValue < maxSymbolValue) + return 0; + + for (s = 0; s <= (int)maxSymbolValue; ++s) { + bad |= (count[s] != 0) & (HUF_getNbBits(ct[s]) == 0); + } + return !bad; +} + +size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); } + +/** HUF_CStream_t: + * Huffman uses its own BIT_CStream_t implementation. + * There are three major differences from BIT_CStream_t: + * 1. HUF_addBits() takes a HUF_CElt (size_t) which is + * the pair (nbBits, value) in the format: + * format: + * - Bits [0, 4) = nbBits + * - Bits [4, 64 - nbBits) = 0 + * - Bits [64 - nbBits, 64) = value + * 2. The bitContainer is built from the upper bits and + * right shifted. E.g. to add a new value of N bits + * you right shift the bitContainer by N, then or in + * the new value into the N upper bits. + * 3. The bitstream has two bit containers. You can add + * bits to the second container and merge them into + * the first container. + */ + +#define HUF_BITS_IN_CONTAINER (sizeof(size_t) * 8) + +typedef struct { + size_t bitContainer[2]; + size_t bitPos[2]; + + BYTE* startPtr; + BYTE* ptr; + BYTE* endPtr; +} HUF_CStream_t; + +/**! HUF_initCStream(): + * Initializes the bitstream. + * @returns 0 or an error code. + */ +static size_t HUF_initCStream(HUF_CStream_t* bitC, + void* startPtr, size_t dstCapacity) +{ + ZSTD_memset(bitC, 0, sizeof(*bitC)); + bitC->startPtr = (BYTE*)startPtr; + bitC->ptr = bitC->startPtr; + bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer[0]); + if (dstCapacity <= sizeof(bitC->bitContainer[0])) return ERROR(dstSize_tooSmall); + return 0; +} + +/*! HUF_addBits(): + * Adds the symbol stored in HUF_CElt elt to the bitstream. + * + * @param elt The element we're adding. This is a (nbBits, value) pair. + * See the HUF_CStream_t docs for the format. + * @param idx Insert into the bitstream at this idx. + * @param kFast This is a template parameter. If the bitstream is guaranteed + * to have at least 4 unused bits after this call it may be 1, + * otherwise it must be 0. HUF_addBits() is faster when fast is set. + */ +FORCE_INLINE_TEMPLATE void HUF_addBits(HUF_CStream_t* bitC, HUF_CElt elt, int idx, int kFast) +{ + assert(idx <= 1); + assert(HUF_getNbBits(elt) <= HUF_TABLELOG_ABSOLUTEMAX); + /* This is efficient on x86-64 with BMI2 because shrx + * only reads the low 6 bits of the register. The compiler + * knows this and elides the mask. When fast is set, + * every operation can use the same value loaded from elt. + */ + bitC->bitContainer[idx] >>= HUF_getNbBits(elt); + bitC->bitContainer[idx] |= kFast ? HUF_getValueFast(elt) : HUF_getValue(elt); + /* We only read the low 8 bits of bitC->bitPos[idx] so it + * doesn't matter that the high bits have noise from the value. + */ + bitC->bitPos[idx] += HUF_getNbBitsFast(elt); + assert((bitC->bitPos[idx] & 0xFF) <= HUF_BITS_IN_CONTAINER); + /* The last 4-bits of elt are dirty if fast is set, + * so we must not be overwriting bits that have already been + * inserted into the bit container. + */ +#if DEBUGLEVEL >= 1 + { + size_t const nbBits = HUF_getNbBits(elt); + size_t const dirtyBits = nbBits == 0 ? 0 : ZSTD_highbit32((U32)nbBits) + 1; + (void)dirtyBits; + /* Middle bits are 0. */ + assert(((elt >> dirtyBits) << (dirtyBits + nbBits)) == 0); + /* We didn't overwrite any bits in the bit container. */ + assert(!kFast || (bitC->bitPos[idx] & 0xFF) <= HUF_BITS_IN_CONTAINER); + (void)dirtyBits; + } +#endif +} + +FORCE_INLINE_TEMPLATE void HUF_zeroIndex1(HUF_CStream_t* bitC) +{ + bitC->bitContainer[1] = 0; + bitC->bitPos[1] = 0; +} + +/*! HUF_mergeIndex1() : + * Merges the bit container @ index 1 into the bit container @ index 0 + * and zeros the bit container @ index 1. + */ +FORCE_INLINE_TEMPLATE void HUF_mergeIndex1(HUF_CStream_t* bitC) +{ + assert((bitC->bitPos[1] & 0xFF) < HUF_BITS_IN_CONTAINER); + bitC->bitContainer[0] >>= (bitC->bitPos[1] & 0xFF); + bitC->bitContainer[0] |= bitC->bitContainer[1]; + bitC->bitPos[0] += bitC->bitPos[1]; + assert((bitC->bitPos[0] & 0xFF) <= HUF_BITS_IN_CONTAINER); +} + +/*! HUF_flushBits() : +* Flushes the bits in the bit container @ index 0. +* +* @post bitPos will be < 8. +* @param kFast If kFast is set then we must know a-priori that +* the bit container will not overflow. +*/ +FORCE_INLINE_TEMPLATE void HUF_flushBits(HUF_CStream_t* bitC, int kFast) +{ + /* The upper bits of bitPos are noisy, so we must mask by 0xFF. */ + size_t const nbBits = bitC->bitPos[0] & 0xFF; + size_t const nbBytes = nbBits >> 3; + /* The top nbBits bits of bitContainer are the ones we need. */ + size_t const bitContainer = bitC->bitContainer[0] >> (HUF_BITS_IN_CONTAINER - nbBits); + /* Mask bitPos to account for the bytes we consumed. */ + bitC->bitPos[0] &= 7; + assert(nbBits > 0); + assert(nbBits <= sizeof(bitC->bitContainer[0]) * 8); + assert(bitC->ptr <= bitC->endPtr); + MEM_writeLEST(bitC->ptr, bitContainer); + bitC->ptr += nbBytes; + assert(!kFast || bitC->ptr <= bitC->endPtr); + if (!kFast && bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr; + /* bitContainer doesn't need to be modified because the leftover + * bits are already the top bitPos bits. And we don't care about + * noise in the lower values. + */ +} + +/*! HUF_endMark() + * @returns The Huffman stream end mark: A 1-bit value = 1. + */ +static HUF_CElt HUF_endMark(void) +{ + HUF_CElt endMark; + HUF_setNbBits(&endMark, 1); + HUF_setValue(&endMark, 1); + return endMark; +} + +/*! HUF_closeCStream() : + * @return Size of CStream, in bytes, + * or 0 if it could not fit into dstBuffer */ +static size_t HUF_closeCStream(HUF_CStream_t* bitC) +{ + HUF_addBits(bitC, HUF_endMark(), /* idx */ 0, /* kFast */ 0); + HUF_flushBits(bitC, /* kFast */ 0); + { + size_t const nbBits = bitC->bitPos[0] & 0xFF; + if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */ + return (size_t)(bitC->ptr - bitC->startPtr) + (nbBits > 0); + } +} + +FORCE_INLINE_TEMPLATE void +HUF_encodeSymbol(HUF_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable, int idx, int fast) +{ + HUF_addBits(bitCPtr, CTable[symbol], idx, fast); +} + +FORCE_INLINE_TEMPLATE void +HUF_compress1X_usingCTable_internal_body_loop(HUF_CStream_t* bitC, + const BYTE* ip, size_t srcSize, + const HUF_CElt* ct, + int kUnroll, int kFastFlush, int kLastFast) +{ + /* Join to kUnroll */ + int n = (int)srcSize; + int rem = n % kUnroll; + if (rem > 0) { + for (; rem > 0; --rem) { + HUF_encodeSymbol(bitC, ip[--n], ct, 0, /* fast */ 0); + } + HUF_flushBits(bitC, kFastFlush); + } + assert(n % kUnroll == 0); + + /* Join to 2 * kUnroll */ + if (n % (2 * kUnroll)) { + int u; + for (u = 1; u < kUnroll; ++u) { + HUF_encodeSymbol(bitC, ip[n - u], ct, 0, 1); + } + HUF_encodeSymbol(bitC, ip[n - kUnroll], ct, 0, kLastFast); + HUF_flushBits(bitC, kFastFlush); + n -= kUnroll; + } + assert(n % (2 * kUnroll) == 0); + + for (; n>0; n-= 2 * kUnroll) { + /* Encode kUnroll symbols into the bitstream @ index 0. */ + int u; + for (u = 1; u < kUnroll; ++u) { + HUF_encodeSymbol(bitC, ip[n - u], ct, /* idx */ 0, /* fast */ 1); + } + HUF_encodeSymbol(bitC, ip[n - kUnroll], ct, /* idx */ 0, /* fast */ kLastFast); + HUF_flushBits(bitC, kFastFlush); + /* Encode kUnroll symbols into the bitstream @ index 1. + * This allows us to start filling the bit container + * without any data dependencies. + */ + HUF_zeroIndex1(bitC); + for (u = 1; u < kUnroll; ++u) { + HUF_encodeSymbol(bitC, ip[n - kUnroll - u], ct, /* idx */ 1, /* fast */ 1); + } + HUF_encodeSymbol(bitC, ip[n - kUnroll - kUnroll], ct, /* idx */ 1, /* fast */ kLastFast); + /* Merge bitstream @ index 1 into the bitstream @ index 0 */ + HUF_mergeIndex1(bitC); + HUF_flushBits(bitC, kFastFlush); + } + assert(n == 0); + +} + +/** + * Returns a tight upper bound on the output space needed by Huffman + * with 8 bytes buffer to handle over-writes. If the output is at least + * this large we don't need to do bounds checks during Huffman encoding. + */ +static size_t HUF_tightCompressBound(size_t srcSize, size_t tableLog) +{ + return ((srcSize * tableLog) >> 3) + 8; +} + + +FORCE_INLINE_TEMPLATE size_t +HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize, + const void* src, size_t srcSize, + const HUF_CElt* CTable) +{ + U32 const tableLog = HUF_readCTableHeader(CTable).tableLog; + HUF_CElt const* ct = CTable + 1; + const BYTE* ip = (const BYTE*) src; + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + dstSize; + HUF_CStream_t bitC; + + /* init */ + if (dstSize < 8) return 0; /* not enough space to compress */ + { BYTE* op = ostart; + size_t const initErr = HUF_initCStream(&bitC, op, (size_t)(oend-op)); + if (HUF_isError(initErr)) return 0; } + + if (dstSize < HUF_tightCompressBound(srcSize, (size_t)tableLog) || tableLog > 11) + HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ MEM_32bits() ? 2 : 4, /* kFast */ 0, /* kLastFast */ 0); + else { + if (MEM_32bits()) { + switch (tableLog) { + case 11: + HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 2, /* kFastFlush */ 1, /* kLastFast */ 0); + break; + case 10: ZSTD_FALLTHROUGH; + case 9: ZSTD_FALLTHROUGH; + case 8: + HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 2, /* kFastFlush */ 1, /* kLastFast */ 1); + break; + case 7: ZSTD_FALLTHROUGH; + default: + HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 3, /* kFastFlush */ 1, /* kLastFast */ 1); + break; + } + } else { + switch (tableLog) { + case 11: + HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 5, /* kFastFlush */ 1, /* kLastFast */ 0); + break; + case 10: + HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 5, /* kFastFlush */ 1, /* kLastFast */ 1); + break; + case 9: + HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 6, /* kFastFlush */ 1, /* kLastFast */ 0); + break; + case 8: + HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 7, /* kFastFlush */ 1, /* kLastFast */ 0); + break; + case 7: + HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 8, /* kFastFlush */ 1, /* kLastFast */ 0); + break; + case 6: ZSTD_FALLTHROUGH; + default: + HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 9, /* kFastFlush */ 1, /* kLastFast */ 1); + break; + } + } + } + assert(bitC.ptr <= bitC.endPtr); + + return HUF_closeCStream(&bitC); +} + +#if DYNAMIC_BMI2 + +static BMI2_TARGET_ATTRIBUTE size_t +HUF_compress1X_usingCTable_internal_bmi2(void* dst, size_t dstSize, + const void* src, size_t srcSize, + const HUF_CElt* CTable) +{ + return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); +} + +static size_t +HUF_compress1X_usingCTable_internal_default(void* dst, size_t dstSize, + const void* src, size_t srcSize, + const HUF_CElt* CTable) +{ + return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); +} + +static size_t +HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize, + const void* src, size_t srcSize, + const HUF_CElt* CTable, const int flags) +{ + if (flags & HUF_flags_bmi2) { + return HUF_compress1X_usingCTable_internal_bmi2(dst, dstSize, src, srcSize, CTable); + } + return HUF_compress1X_usingCTable_internal_default(dst, dstSize, src, srcSize, CTable); +} + +#else + +static size_t +HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize, + const void* src, size_t srcSize, + const HUF_CElt* CTable, const int flags) +{ + (void)flags; + return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); +} + +#endif + +size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags) +{ + return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, flags); +} + +static size_t +HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize, + const void* src, size_t srcSize, + const HUF_CElt* CTable, int flags) +{ + size_t const segmentSize = (srcSize+3)/4; /* first 3 segments */ + const BYTE* ip = (const BYTE*) src; + const BYTE* const iend = ip + srcSize; + BYTE* const ostart = (BYTE*) dst; + BYTE* const oend = ostart + dstSize; + BYTE* op = ostart; + + if (dstSize < 6 + 1 + 1 + 1 + 8) return 0; /* minimum space to compress successfully */ + if (srcSize < 12) return 0; /* no saving possible : too small input */ + op += 6; /* jumpTable */ + + assert(op <= oend); + { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, flags) ); + if (cSize == 0 || cSize > 65535) return 0; + MEM_writeLE16(ostart, (U16)cSize); + op += cSize; + } + + ip += segmentSize; + assert(op <= oend); + { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, flags) ); + if (cSize == 0 || cSize > 65535) return 0; + MEM_writeLE16(ostart+2, (U16)cSize); + op += cSize; + } + + ip += segmentSize; + assert(op <= oend); + { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, flags) ); + if (cSize == 0 || cSize > 65535) return 0; + MEM_writeLE16(ostart+4, (U16)cSize); + op += cSize; + } + + ip += segmentSize; + assert(op <= oend); + assert(ip <= iend); + { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, flags) ); + if (cSize == 0 || cSize > 65535) return 0; + op += cSize; + } + + return (size_t)(op-ostart); +} + +size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags) +{ + return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, flags); +} + +typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e; + +static size_t HUF_compressCTable_internal( + BYTE* const ostart, BYTE* op, BYTE* const oend, + const void* src, size_t srcSize, + HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int flags) +{ + size_t const cSize = (nbStreams==HUF_singleStream) ? + HUF_compress1X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, flags) : + HUF_compress4X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, flags); + if (HUF_isError(cSize)) { return cSize; } + if (cSize==0) { return 0; } /* uncompressible */ + op += cSize; + /* check compressibility */ + assert(op >= ostart); + if ((size_t)(op-ostart) >= srcSize-1) { return 0; } + return (size_t)(op-ostart); +} + +typedef struct { + unsigned count[HUF_SYMBOLVALUE_MAX + 1]; + HUF_CElt CTable[HUF_CTABLE_SIZE_ST(HUF_SYMBOLVALUE_MAX)]; + union { + HUF_buildCTable_wksp_tables buildCTable_wksp; + HUF_WriteCTableWksp writeCTable_wksp; + U32 hist_wksp[HIST_WKSP_SIZE_U32]; + } wksps; +} HUF_compress_tables_t; + +#define SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE 4096 +#define SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO 10 /* Must be >= 2 */ + +unsigned HUF_cardinality(const unsigned* count, unsigned maxSymbolValue) +{ + unsigned cardinality = 0; + unsigned i; + + for (i = 0; i < maxSymbolValue + 1; i++) { + if (count[i] != 0) cardinality += 1; + } + + return cardinality; +} + +unsigned HUF_minTableLog(unsigned symbolCardinality) +{ + U32 minBitsSymbols = ZSTD_highbit32(symbolCardinality) + 1; + return minBitsSymbols; +} + +unsigned HUF_optimalTableLog( + unsigned maxTableLog, + size_t srcSize, + unsigned maxSymbolValue, + void* workSpace, size_t wkspSize, + HUF_CElt* table, + const unsigned* count, + int flags) +{ + assert(srcSize > 1); /* Not supported, RLE should be used instead */ + assert(wkspSize >= sizeof(HUF_buildCTable_wksp_tables)); + + if (!(flags & HUF_flags_optimalDepth)) { + /* cheap evaluation, based on FSE */ + return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1); + } + + { BYTE* dst = (BYTE*)workSpace + sizeof(HUF_WriteCTableWksp); + size_t dstSize = wkspSize - sizeof(HUF_WriteCTableWksp); + size_t hSize, newSize; + const unsigned symbolCardinality = HUF_cardinality(count, maxSymbolValue); + const unsigned minTableLog = HUF_minTableLog(symbolCardinality); + size_t optSize = ((size_t) ~0) - 1; + unsigned optLog = maxTableLog, optLogGuess; + + DEBUGLOG(6, "HUF_optimalTableLog: probing huf depth (srcSize=%zu)", srcSize); + + /* Search until size increases */ + for (optLogGuess = minTableLog; optLogGuess <= maxTableLog; optLogGuess++) { + DEBUGLOG(7, "checking for huffLog=%u", optLogGuess); + + { size_t maxBits = HUF_buildCTable_wksp(table, count, maxSymbolValue, optLogGuess, workSpace, wkspSize); + if (ERR_isError(maxBits)) continue; + + if (maxBits < optLogGuess && optLogGuess > minTableLog) break; + + hSize = HUF_writeCTable_wksp(dst, dstSize, table, maxSymbolValue, (U32)maxBits, workSpace, wkspSize); + } + + if (ERR_isError(hSize)) continue; + + newSize = HUF_estimateCompressedSize(table, count, maxSymbolValue) + hSize; + + if (newSize > optSize + 1) { + break; + } + + if (newSize < optSize) { + optSize = newSize; + optLog = optLogGuess; + } + } + assert(optLog <= HUF_TABLELOG_MAX); + return optLog; + } +} + +/* HUF_compress_internal() : + * `workSpace_align4` must be aligned on 4-bytes boundaries, + * and occupies the same space as a table of HUF_WORKSPACE_SIZE_U64 unsigned */ +static size_t +HUF_compress_internal (void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned huffLog, + HUF_nbStreams_e nbStreams, + void* workSpace, size_t wkspSize, + HUF_CElt* oldHufTable, HUF_repeat* repeat, int flags) +{ + HUF_compress_tables_t* const table = (HUF_compress_tables_t*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(size_t)); + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + dstSize; + BYTE* op = ostart; + + DEBUGLOG(5, "HUF_compress_internal (srcSize=%zu)", srcSize); + HUF_STATIC_ASSERT(sizeof(*table) + HUF_WORKSPACE_MAX_ALIGNMENT <= HUF_WORKSPACE_SIZE); + + /* checks & inits */ + if (wkspSize < sizeof(*table)) return ERROR(workSpace_tooSmall); + if (!srcSize) return 0; /* Uncompressed */ + if (!dstSize) return 0; /* cannot fit anything within dst budget */ + if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); /* current block size limit */ + if (huffLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); + if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge); + if (!maxSymbolValue) maxSymbolValue = HUF_SYMBOLVALUE_MAX; + if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT; + + /* Heuristic : If old table is valid, use it for small inputs */ + if ((flags & HUF_flags_preferRepeat) && repeat && *repeat == HUF_repeat_valid) { + return HUF_compressCTable_internal(ostart, op, oend, + src, srcSize, + nbStreams, oldHufTable, flags); + } + + /* If uncompressible data is suspected, do a smaller sampling first */ + DEBUG_STATIC_ASSERT(SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO >= 2); + if ((flags & HUF_flags_suspectUncompressible) && srcSize >= (SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE * SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO)) { + size_t largestTotal = 0; + DEBUGLOG(5, "input suspected incompressible : sampling to check"); + { unsigned maxSymbolValueBegin = maxSymbolValue; + CHECK_V_F(largestBegin, HIST_count_simple (table->count, &maxSymbolValueBegin, (const BYTE*)src, SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) ); + largestTotal += largestBegin; + } + { unsigned maxSymbolValueEnd = maxSymbolValue; + CHECK_V_F(largestEnd, HIST_count_simple (table->count, &maxSymbolValueEnd, (const BYTE*)src + srcSize - SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE, SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) ); + largestTotal += largestEnd; + } + if (largestTotal <= ((2 * SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) >> 7)+4) return 0; /* heuristic : probably not compressible enough */ + } + + /* Scan input and build symbol stats */ + { CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, table->wksps.hist_wksp, sizeof(table->wksps.hist_wksp)) ); + if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */ + if (largest <= (srcSize >> 7)+4) return 0; /* heuristic : probably not compressible enough */ + } + DEBUGLOG(6, "histogram detail completed (%zu symbols)", showU32(table->count, maxSymbolValue+1)); + + /* Check validity of previous table */ + if ( repeat + && *repeat == HUF_repeat_check + && !HUF_validateCTable(oldHufTable, table->count, maxSymbolValue)) { + *repeat = HUF_repeat_none; + } + /* Heuristic : use existing table for small inputs */ + if ((flags & HUF_flags_preferRepeat) && repeat && *repeat != HUF_repeat_none) { + return HUF_compressCTable_internal(ostart, op, oend, + src, srcSize, + nbStreams, oldHufTable, flags); + } + + /* Build Huffman Tree */ + huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue, &table->wksps, sizeof(table->wksps), table->CTable, table->count, flags); + { size_t const maxBits = HUF_buildCTable_wksp(table->CTable, table->count, + maxSymbolValue, huffLog, + &table->wksps.buildCTable_wksp, sizeof(table->wksps.buildCTable_wksp)); + CHECK_F(maxBits); + huffLog = (U32)maxBits; + DEBUGLOG(6, "bit distribution completed (%zu symbols)", showCTableBits(table->CTable + 1, maxSymbolValue+1)); + } + + /* Write table description header */ + { CHECK_V_F(hSize, HUF_writeCTable_wksp(op, dstSize, table->CTable, maxSymbolValue, huffLog, + &table->wksps.writeCTable_wksp, sizeof(table->wksps.writeCTable_wksp)) ); + /* Check if using previous huffman table is beneficial */ + if (repeat && *repeat != HUF_repeat_none) { + size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, table->count, maxSymbolValue); + size_t const newSize = HUF_estimateCompressedSize(table->CTable, table->count, maxSymbolValue); + if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) { + return HUF_compressCTable_internal(ostart, op, oend, + src, srcSize, + nbStreams, oldHufTable, flags); + } } + + /* Use the new huffman table */ + if (hSize + 12ul >= srcSize) { return 0; } + op += hSize; + if (repeat) { *repeat = HUF_repeat_none; } + if (oldHufTable) + ZSTD_memcpy(oldHufTable, table->CTable, sizeof(table->CTable)); /* Save new table */ + } + return HUF_compressCTable_internal(ostart, op, oend, + src, srcSize, + nbStreams, table->CTable, flags); +} + +size_t HUF_compress1X_repeat (void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned huffLog, + void* workSpace, size_t wkspSize, + HUF_CElt* hufTable, HUF_repeat* repeat, int flags) +{ + DEBUGLOG(5, "HUF_compress1X_repeat (srcSize = %zu)", srcSize); + return HUF_compress_internal(dst, dstSize, src, srcSize, + maxSymbolValue, huffLog, HUF_singleStream, + workSpace, wkspSize, hufTable, + repeat, flags); +} + +/* HUF_compress4X_repeat(): + * compress input using 4 streams. + * consider skipping quickly + * reuse an existing huffman compression table */ +size_t HUF_compress4X_repeat (void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned huffLog, + void* workSpace, size_t wkspSize, + HUF_CElt* hufTable, HUF_repeat* repeat, int flags) +{ + DEBUGLOG(5, "HUF_compress4X_repeat (srcSize = %zu)", srcSize); + return HUF_compress_internal(dst, dstSize, src, srcSize, + maxSymbolValue, huffLog, HUF_fourStreams, + workSpace, wkspSize, + hufTable, repeat, flags); +} diff --git a/c-blosc/internal-complibs/zstd-1.5.6/compress/zstd_compress.c b/c-blosc/internal-complibs/zstd-1.5.6/compress/zstd_compress.c new file mode 100644 index 0000000..9284e2a --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.5.6/compress/zstd_compress.c @@ -0,0 +1,7153 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/*-************************************* +* Dependencies +***************************************/ +#include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customCalloc, ZSTD_customFree */ +#include "../common/zstd_deps.h" /* INT_MAX, ZSTD_memset, ZSTD_memcpy */ +#include "../common/mem.h" +#include "hist.h" /* HIST_countFast_wksp */ +#define FSE_STATIC_LINKING_ONLY /* FSE_encodeSymbol */ +#include "../common/fse.h" +#include "../common/huf.h" +#include "zstd_compress_internal.h" +#include "zstd_compress_sequences.h" +#include "zstd_compress_literals.h" +#include "zstd_fast.h" +#include "zstd_double_fast.h" +#include "zstd_lazy.h" +#include "zstd_opt.h" +#include "zstd_ldm.h" +#include "zstd_compress_superblock.h" +#include "../common/bits.h" /* ZSTD_highbit32, ZSTD_rotateRight_U64 */ + +/* *************************************************************** +* Tuning parameters +*****************************************************************/ +/*! + * COMPRESS_HEAPMODE : + * Select how default decompression function ZSTD_compress() allocates its context, + * on stack (0, default), or into heap (1). + * Note that functions with explicit context such as ZSTD_compressCCtx() are unaffected. + */ +#ifndef ZSTD_COMPRESS_HEAPMODE +# define ZSTD_COMPRESS_HEAPMODE 0 +#endif + +/*! + * ZSTD_HASHLOG3_MAX : + * Maximum size of the hash table dedicated to find 3-bytes matches, + * in log format, aka 17 => 1 << 17 == 128Ki positions. + * This structure is only used in zstd_opt. + * Since allocation is centralized for all strategies, it has to be known here. + * The actual (selected) size of the hash table is then stored in ZSTD_matchState_t.hashLog3, + * so that zstd_opt.c doesn't need to know about this constant. + */ +#ifndef ZSTD_HASHLOG3_MAX +# define ZSTD_HASHLOG3_MAX 17 +#endif + +/*-************************************* +* Helper functions +***************************************/ +/* ZSTD_compressBound() + * Note that the result from this function is only valid for + * the one-pass compression functions. + * When employing the streaming mode, + * if flushes are frequently altering the size of blocks, + * the overhead from block headers can make the compressed data larger + * than the return value of ZSTD_compressBound(). + */ +size_t ZSTD_compressBound(size_t srcSize) { + size_t const r = ZSTD_COMPRESSBOUND(srcSize); + if (r==0) return ERROR(srcSize_wrong); + return r; +} + + +/*-************************************* +* Context memory management +***************************************/ +struct ZSTD_CDict_s { + const void* dictContent; + size_t dictContentSize; + ZSTD_dictContentType_e dictContentType; /* The dictContentType the CDict was created with */ + U32* entropyWorkspace; /* entropy workspace of HUF_WORKSPACE_SIZE bytes */ + ZSTD_cwksp workspace; + ZSTD_matchState_t matchState; + ZSTD_compressedBlockState_t cBlockState; + ZSTD_customMem customMem; + U32 dictID; + int compressionLevel; /* 0 indicates that advanced API was used to select CDict params */ + ZSTD_paramSwitch_e useRowMatchFinder; /* Indicates whether the CDict was created with params that would use + * row-based matchfinder. Unless the cdict is reloaded, we will use + * the same greedy/lazy matchfinder at compression time. + */ +}; /* typedef'd to ZSTD_CDict within "zstd.h" */ + +ZSTD_CCtx* ZSTD_createCCtx(void) +{ + return ZSTD_createCCtx_advanced(ZSTD_defaultCMem); +} + +static void ZSTD_initCCtx(ZSTD_CCtx* cctx, ZSTD_customMem memManager) +{ + assert(cctx != NULL); + ZSTD_memset(cctx, 0, sizeof(*cctx)); + cctx->customMem = memManager; + cctx->bmi2 = ZSTD_cpuSupportsBmi2(); + { size_t const err = ZSTD_CCtx_reset(cctx, ZSTD_reset_parameters); + assert(!ZSTD_isError(err)); + (void)err; + } +} + +ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem) +{ + ZSTD_STATIC_ASSERT(zcss_init==0); + ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN==(0ULL - 1)); + if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL; + { ZSTD_CCtx* const cctx = (ZSTD_CCtx*)ZSTD_customMalloc(sizeof(ZSTD_CCtx), customMem); + if (!cctx) return NULL; + ZSTD_initCCtx(cctx, customMem); + return cctx; + } +} + +ZSTD_CCtx* ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize) +{ + ZSTD_cwksp ws; + ZSTD_CCtx* cctx; + if (workspaceSize <= sizeof(ZSTD_CCtx)) return NULL; /* minimum size */ + if ((size_t)workspace & 7) return NULL; /* must be 8-aligned */ + ZSTD_cwksp_init(&ws, workspace, workspaceSize, ZSTD_cwksp_static_alloc); + + cctx = (ZSTD_CCtx*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CCtx)); + if (cctx == NULL) return NULL; + + ZSTD_memset(cctx, 0, sizeof(ZSTD_CCtx)); + ZSTD_cwksp_move(&cctx->workspace, &ws); + cctx->staticSize = workspaceSize; + + /* statically sized space. entropyWorkspace never moves (but prev/next block swap places) */ + if (!ZSTD_cwksp_check_available(&cctx->workspace, ENTROPY_WORKSPACE_SIZE + 2 * sizeof(ZSTD_compressedBlockState_t))) return NULL; + cctx->blockState.prevCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t)); + cctx->blockState.nextCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t)); + cctx->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object(&cctx->workspace, ENTROPY_WORKSPACE_SIZE); + cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid()); + return cctx; +} + +/** + * Clears and frees all of the dictionaries in the CCtx. + */ +static void ZSTD_clearAllDicts(ZSTD_CCtx* cctx) +{ + ZSTD_customFree(cctx->localDict.dictBuffer, cctx->customMem); + ZSTD_freeCDict(cctx->localDict.cdict); + ZSTD_memset(&cctx->localDict, 0, sizeof(cctx->localDict)); + ZSTD_memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict)); + cctx->cdict = NULL; +} + +static size_t ZSTD_sizeof_localDict(ZSTD_localDict dict) +{ + size_t const bufferSize = dict.dictBuffer != NULL ? dict.dictSize : 0; + size_t const cdictSize = ZSTD_sizeof_CDict(dict.cdict); + return bufferSize + cdictSize; +} + +static void ZSTD_freeCCtxContent(ZSTD_CCtx* cctx) +{ + assert(cctx != NULL); + assert(cctx->staticSize == 0); + ZSTD_clearAllDicts(cctx); +#ifdef ZSTD_MULTITHREAD + ZSTDMT_freeCCtx(cctx->mtctx); cctx->mtctx = NULL; +#endif + ZSTD_cwksp_free(&cctx->workspace, cctx->customMem); +} + +size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx) +{ + DEBUGLOG(3, "ZSTD_freeCCtx (address: %p)", (void*)cctx); + if (cctx==NULL) return 0; /* support free on NULL */ + RETURN_ERROR_IF(cctx->staticSize, memory_allocation, + "not compatible with static CCtx"); + { int cctxInWorkspace = ZSTD_cwksp_owns_buffer(&cctx->workspace, cctx); + ZSTD_freeCCtxContent(cctx); + if (!cctxInWorkspace) ZSTD_customFree(cctx, cctx->customMem); + } + return 0; +} + + +static size_t ZSTD_sizeof_mtctx(const ZSTD_CCtx* cctx) +{ +#ifdef ZSTD_MULTITHREAD + return ZSTDMT_sizeof_CCtx(cctx->mtctx); +#else + (void)cctx; + return 0; +#endif +} + + +size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx) +{ + if (cctx==NULL) return 0; /* support sizeof on NULL */ + /* cctx may be in the workspace */ + return (cctx->workspace.workspace == cctx ? 0 : sizeof(*cctx)) + + ZSTD_cwksp_sizeof(&cctx->workspace) + + ZSTD_sizeof_localDict(cctx->localDict) + + ZSTD_sizeof_mtctx(cctx); +} + +size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs) +{ + return ZSTD_sizeof_CCtx(zcs); /* same object */ +} + +/* private API call, for dictBuilder only */ +const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) { return &(ctx->seqStore); } + +/* Returns true if the strategy supports using a row based matchfinder */ +static int ZSTD_rowMatchFinderSupported(const ZSTD_strategy strategy) { + return (strategy >= ZSTD_greedy && strategy <= ZSTD_lazy2); +} + +/* Returns true if the strategy and useRowMatchFinder mode indicate that we will use the row based matchfinder + * for this compression. + */ +static int ZSTD_rowMatchFinderUsed(const ZSTD_strategy strategy, const ZSTD_paramSwitch_e mode) { + assert(mode != ZSTD_ps_auto); + return ZSTD_rowMatchFinderSupported(strategy) && (mode == ZSTD_ps_enable); +} + +/* Returns row matchfinder usage given an initial mode and cParams */ +static ZSTD_paramSwitch_e ZSTD_resolveRowMatchFinderMode(ZSTD_paramSwitch_e mode, + const ZSTD_compressionParameters* const cParams) { +#if defined(ZSTD_ARCH_X86_SSE2) || defined(ZSTD_ARCH_ARM_NEON) + int const kHasSIMD128 = 1; +#else + int const kHasSIMD128 = 0; +#endif + if (mode != ZSTD_ps_auto) return mode; /* if requested enabled, but no SIMD, we still will use row matchfinder */ + mode = ZSTD_ps_disable; + if (!ZSTD_rowMatchFinderSupported(cParams->strategy)) return mode; + if (kHasSIMD128) { + if (cParams->windowLog > 14) mode = ZSTD_ps_enable; + } else { + if (cParams->windowLog > 17) mode = ZSTD_ps_enable; + } + return mode; +} + +/* Returns block splitter usage (generally speaking, when using slower/stronger compression modes) */ +static ZSTD_paramSwitch_e ZSTD_resolveBlockSplitterMode(ZSTD_paramSwitch_e mode, + const ZSTD_compressionParameters* const cParams) { + if (mode != ZSTD_ps_auto) return mode; + return (cParams->strategy >= ZSTD_btopt && cParams->windowLog >= 17) ? ZSTD_ps_enable : ZSTD_ps_disable; +} + +/* Returns 1 if the arguments indicate that we should allocate a chainTable, 0 otherwise */ +static int ZSTD_allocateChainTable(const ZSTD_strategy strategy, + const ZSTD_paramSwitch_e useRowMatchFinder, + const U32 forDDSDict) { + assert(useRowMatchFinder != ZSTD_ps_auto); + /* We always should allocate a chaintable if we are allocating a matchstate for a DDS dictionary matchstate. + * We do not allocate a chaintable if we are using ZSTD_fast, or are using the row-based matchfinder. + */ + return forDDSDict || ((strategy != ZSTD_fast) && !ZSTD_rowMatchFinderUsed(strategy, useRowMatchFinder)); +} + +/* Returns ZSTD_ps_enable if compression parameters are such that we should + * enable long distance matching (wlog >= 27, strategy >= btopt). + * Returns ZSTD_ps_disable otherwise. + */ +static ZSTD_paramSwitch_e ZSTD_resolveEnableLdm(ZSTD_paramSwitch_e mode, + const ZSTD_compressionParameters* const cParams) { + if (mode != ZSTD_ps_auto) return mode; + return (cParams->strategy >= ZSTD_btopt && cParams->windowLog >= 27) ? ZSTD_ps_enable : ZSTD_ps_disable; +} + +static int ZSTD_resolveExternalSequenceValidation(int mode) { + return mode; +} + +/* Resolves maxBlockSize to the default if no value is present. */ +static size_t ZSTD_resolveMaxBlockSize(size_t maxBlockSize) { + if (maxBlockSize == 0) { + return ZSTD_BLOCKSIZE_MAX; + } else { + return maxBlockSize; + } +} + +static ZSTD_paramSwitch_e ZSTD_resolveExternalRepcodeSearch(ZSTD_paramSwitch_e value, int cLevel) { + if (value != ZSTD_ps_auto) return value; + if (cLevel < 10) { + return ZSTD_ps_disable; + } else { + return ZSTD_ps_enable; + } +} + +/* Returns 1 if compression parameters are such that CDict hashtable and chaintable indices are tagged. + * If so, the tags need to be removed in ZSTD_resetCCtx_byCopyingCDict. */ +static int ZSTD_CDictIndicesAreTagged(const ZSTD_compressionParameters* const cParams) { + return cParams->strategy == ZSTD_fast || cParams->strategy == ZSTD_dfast; +} + +static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams( + ZSTD_compressionParameters cParams) +{ + ZSTD_CCtx_params cctxParams; + /* should not matter, as all cParams are presumed properly defined */ + ZSTD_CCtxParams_init(&cctxParams, ZSTD_CLEVEL_DEFAULT); + cctxParams.cParams = cParams; + + /* Adjust advanced params according to cParams */ + cctxParams.ldmParams.enableLdm = ZSTD_resolveEnableLdm(cctxParams.ldmParams.enableLdm, &cParams); + if (cctxParams.ldmParams.enableLdm == ZSTD_ps_enable) { + ZSTD_ldm_adjustParameters(&cctxParams.ldmParams, &cParams); + assert(cctxParams.ldmParams.hashLog >= cctxParams.ldmParams.bucketSizeLog); + assert(cctxParams.ldmParams.hashRateLog < 32); + } + cctxParams.useBlockSplitter = ZSTD_resolveBlockSplitterMode(cctxParams.useBlockSplitter, &cParams); + cctxParams.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams.useRowMatchFinder, &cParams); + cctxParams.validateSequences = ZSTD_resolveExternalSequenceValidation(cctxParams.validateSequences); + cctxParams.maxBlockSize = ZSTD_resolveMaxBlockSize(cctxParams.maxBlockSize); + cctxParams.searchForExternalRepcodes = ZSTD_resolveExternalRepcodeSearch(cctxParams.searchForExternalRepcodes, + cctxParams.compressionLevel); + assert(!ZSTD_checkCParams(cParams)); + return cctxParams; +} + +static ZSTD_CCtx_params* ZSTD_createCCtxParams_advanced( + ZSTD_customMem customMem) +{ + ZSTD_CCtx_params* params; + if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL; + params = (ZSTD_CCtx_params*)ZSTD_customCalloc( + sizeof(ZSTD_CCtx_params), customMem); + if (!params) { return NULL; } + ZSTD_CCtxParams_init(params, ZSTD_CLEVEL_DEFAULT); + params->customMem = customMem; + return params; +} + +ZSTD_CCtx_params* ZSTD_createCCtxParams(void) +{ + return ZSTD_createCCtxParams_advanced(ZSTD_defaultCMem); +} + +size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params) +{ + if (params == NULL) { return 0; } + ZSTD_customFree(params, params->customMem); + return 0; +} + +size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params) +{ + return ZSTD_CCtxParams_init(params, ZSTD_CLEVEL_DEFAULT); +} + +size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel) { + RETURN_ERROR_IF(!cctxParams, GENERIC, "NULL pointer!"); + ZSTD_memset(cctxParams, 0, sizeof(*cctxParams)); + cctxParams->compressionLevel = compressionLevel; + cctxParams->fParams.contentSizeFlag = 1; + return 0; +} + +#define ZSTD_NO_CLEVEL 0 + +/** + * Initializes `cctxParams` from `params` and `compressionLevel`. + * @param compressionLevel If params are derived from a compression level then that compression level, otherwise ZSTD_NO_CLEVEL. + */ +static void +ZSTD_CCtxParams_init_internal(ZSTD_CCtx_params* cctxParams, + const ZSTD_parameters* params, + int compressionLevel) +{ + assert(!ZSTD_checkCParams(params->cParams)); + ZSTD_memset(cctxParams, 0, sizeof(*cctxParams)); + cctxParams->cParams = params->cParams; + cctxParams->fParams = params->fParams; + /* Should not matter, as all cParams are presumed properly defined. + * But, set it for tracing anyway. + */ + cctxParams->compressionLevel = compressionLevel; + cctxParams->useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams->useRowMatchFinder, ¶ms->cParams); + cctxParams->useBlockSplitter = ZSTD_resolveBlockSplitterMode(cctxParams->useBlockSplitter, ¶ms->cParams); + cctxParams->ldmParams.enableLdm = ZSTD_resolveEnableLdm(cctxParams->ldmParams.enableLdm, ¶ms->cParams); + cctxParams->validateSequences = ZSTD_resolveExternalSequenceValidation(cctxParams->validateSequences); + cctxParams->maxBlockSize = ZSTD_resolveMaxBlockSize(cctxParams->maxBlockSize); + cctxParams->searchForExternalRepcodes = ZSTD_resolveExternalRepcodeSearch(cctxParams->searchForExternalRepcodes, compressionLevel); + DEBUGLOG(4, "ZSTD_CCtxParams_init_internal: useRowMatchFinder=%d, useBlockSplitter=%d ldm=%d", + cctxParams->useRowMatchFinder, cctxParams->useBlockSplitter, cctxParams->ldmParams.enableLdm); +} + +size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params) +{ + RETURN_ERROR_IF(!cctxParams, GENERIC, "NULL pointer!"); + FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) , ""); + ZSTD_CCtxParams_init_internal(cctxParams, ¶ms, ZSTD_NO_CLEVEL); + return 0; +} + +/** + * Sets cctxParams' cParams and fParams from params, but otherwise leaves them alone. + * @param params Validated zstd parameters. + */ +static void ZSTD_CCtxParams_setZstdParams( + ZSTD_CCtx_params* cctxParams, const ZSTD_parameters* params) +{ + assert(!ZSTD_checkCParams(params->cParams)); + cctxParams->cParams = params->cParams; + cctxParams->fParams = params->fParams; + /* Should not matter, as all cParams are presumed properly defined. + * But, set it for tracing anyway. + */ + cctxParams->compressionLevel = ZSTD_NO_CLEVEL; +} + +ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param) +{ + ZSTD_bounds bounds = { 0, 0, 0 }; + + switch(param) + { + case ZSTD_c_compressionLevel: + bounds.lowerBound = ZSTD_minCLevel(); + bounds.upperBound = ZSTD_maxCLevel(); + return bounds; + + case ZSTD_c_windowLog: + bounds.lowerBound = ZSTD_WINDOWLOG_MIN; + bounds.upperBound = ZSTD_WINDOWLOG_MAX; + return bounds; + + case ZSTD_c_hashLog: + bounds.lowerBound = ZSTD_HASHLOG_MIN; + bounds.upperBound = ZSTD_HASHLOG_MAX; + return bounds; + + case ZSTD_c_chainLog: + bounds.lowerBound = ZSTD_CHAINLOG_MIN; + bounds.upperBound = ZSTD_CHAINLOG_MAX; + return bounds; + + case ZSTD_c_searchLog: + bounds.lowerBound = ZSTD_SEARCHLOG_MIN; + bounds.upperBound = ZSTD_SEARCHLOG_MAX; + return bounds; + + case ZSTD_c_minMatch: + bounds.lowerBound = ZSTD_MINMATCH_MIN; + bounds.upperBound = ZSTD_MINMATCH_MAX; + return bounds; + + case ZSTD_c_targetLength: + bounds.lowerBound = ZSTD_TARGETLENGTH_MIN; + bounds.upperBound = ZSTD_TARGETLENGTH_MAX; + return bounds; + + case ZSTD_c_strategy: + bounds.lowerBound = ZSTD_STRATEGY_MIN; + bounds.upperBound = ZSTD_STRATEGY_MAX; + return bounds; + + case ZSTD_c_contentSizeFlag: + bounds.lowerBound = 0; + bounds.upperBound = 1; + return bounds; + + case ZSTD_c_checksumFlag: + bounds.lowerBound = 0; + bounds.upperBound = 1; + return bounds; + + case ZSTD_c_dictIDFlag: + bounds.lowerBound = 0; + bounds.upperBound = 1; + return bounds; + + case ZSTD_c_nbWorkers: + bounds.lowerBound = 0; +#ifdef ZSTD_MULTITHREAD + bounds.upperBound = ZSTDMT_NBWORKERS_MAX; +#else + bounds.upperBound = 0; +#endif + return bounds; + + case ZSTD_c_jobSize: + bounds.lowerBound = 0; +#ifdef ZSTD_MULTITHREAD + bounds.upperBound = ZSTDMT_JOBSIZE_MAX; +#else + bounds.upperBound = 0; +#endif + return bounds; + + case ZSTD_c_overlapLog: +#ifdef ZSTD_MULTITHREAD + bounds.lowerBound = ZSTD_OVERLAPLOG_MIN; + bounds.upperBound = ZSTD_OVERLAPLOG_MAX; +#else + bounds.lowerBound = 0; + bounds.upperBound = 0; +#endif + return bounds; + + case ZSTD_c_enableDedicatedDictSearch: + bounds.lowerBound = 0; + bounds.upperBound = 1; + return bounds; + + case ZSTD_c_enableLongDistanceMatching: + bounds.lowerBound = (int)ZSTD_ps_auto; + bounds.upperBound = (int)ZSTD_ps_disable; + return bounds; + + case ZSTD_c_ldmHashLog: + bounds.lowerBound = ZSTD_LDM_HASHLOG_MIN; + bounds.upperBound = ZSTD_LDM_HASHLOG_MAX; + return bounds; + + case ZSTD_c_ldmMinMatch: + bounds.lowerBound = ZSTD_LDM_MINMATCH_MIN; + bounds.upperBound = ZSTD_LDM_MINMATCH_MAX; + return bounds; + + case ZSTD_c_ldmBucketSizeLog: + bounds.lowerBound = ZSTD_LDM_BUCKETSIZELOG_MIN; + bounds.upperBound = ZSTD_LDM_BUCKETSIZELOG_MAX; + return bounds; + + case ZSTD_c_ldmHashRateLog: + bounds.lowerBound = ZSTD_LDM_HASHRATELOG_MIN; + bounds.upperBound = ZSTD_LDM_HASHRATELOG_MAX; + return bounds; + + /* experimental parameters */ + case ZSTD_c_rsyncable: + bounds.lowerBound = 0; + bounds.upperBound = 1; + return bounds; + + case ZSTD_c_forceMaxWindow : + bounds.lowerBound = 0; + bounds.upperBound = 1; + return bounds; + + case ZSTD_c_format: + ZSTD_STATIC_ASSERT(ZSTD_f_zstd1 < ZSTD_f_zstd1_magicless); + bounds.lowerBound = ZSTD_f_zstd1; + bounds.upperBound = ZSTD_f_zstd1_magicless; /* note : how to ensure at compile time that this is the highest value enum ? */ + return bounds; + + case ZSTD_c_forceAttachDict: + ZSTD_STATIC_ASSERT(ZSTD_dictDefaultAttach < ZSTD_dictForceLoad); + bounds.lowerBound = ZSTD_dictDefaultAttach; + bounds.upperBound = ZSTD_dictForceLoad; /* note : how to ensure at compile time that this is the highest value enum ? */ + return bounds; + + case ZSTD_c_literalCompressionMode: + ZSTD_STATIC_ASSERT(ZSTD_ps_auto < ZSTD_ps_enable && ZSTD_ps_enable < ZSTD_ps_disable); + bounds.lowerBound = (int)ZSTD_ps_auto; + bounds.upperBound = (int)ZSTD_ps_disable; + return bounds; + + case ZSTD_c_targetCBlockSize: + bounds.lowerBound = ZSTD_TARGETCBLOCKSIZE_MIN; + bounds.upperBound = ZSTD_TARGETCBLOCKSIZE_MAX; + return bounds; + + case ZSTD_c_srcSizeHint: + bounds.lowerBound = ZSTD_SRCSIZEHINT_MIN; + bounds.upperBound = ZSTD_SRCSIZEHINT_MAX; + return bounds; + + case ZSTD_c_stableInBuffer: + case ZSTD_c_stableOutBuffer: + bounds.lowerBound = (int)ZSTD_bm_buffered; + bounds.upperBound = (int)ZSTD_bm_stable; + return bounds; + + case ZSTD_c_blockDelimiters: + bounds.lowerBound = (int)ZSTD_sf_noBlockDelimiters; + bounds.upperBound = (int)ZSTD_sf_explicitBlockDelimiters; + return bounds; + + case ZSTD_c_validateSequences: + bounds.lowerBound = 0; + bounds.upperBound = 1; + return bounds; + + case ZSTD_c_useBlockSplitter: + bounds.lowerBound = (int)ZSTD_ps_auto; + bounds.upperBound = (int)ZSTD_ps_disable; + return bounds; + + case ZSTD_c_useRowMatchFinder: + bounds.lowerBound = (int)ZSTD_ps_auto; + bounds.upperBound = (int)ZSTD_ps_disable; + return bounds; + + case ZSTD_c_deterministicRefPrefix: + bounds.lowerBound = 0; + bounds.upperBound = 1; + return bounds; + + case ZSTD_c_prefetchCDictTables: + bounds.lowerBound = (int)ZSTD_ps_auto; + bounds.upperBound = (int)ZSTD_ps_disable; + return bounds; + + case ZSTD_c_enableSeqProducerFallback: + bounds.lowerBound = 0; + bounds.upperBound = 1; + return bounds; + + case ZSTD_c_maxBlockSize: + bounds.lowerBound = ZSTD_BLOCKSIZE_MAX_MIN; + bounds.upperBound = ZSTD_BLOCKSIZE_MAX; + return bounds; + + case ZSTD_c_searchForExternalRepcodes: + bounds.lowerBound = (int)ZSTD_ps_auto; + bounds.upperBound = (int)ZSTD_ps_disable; + return bounds; + + default: + bounds.error = ERROR(parameter_unsupported); + return bounds; + } +} + +/* ZSTD_cParam_clampBounds: + * Clamps the value into the bounded range. + */ +static size_t ZSTD_cParam_clampBounds(ZSTD_cParameter cParam, int* value) +{ + ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); + if (ZSTD_isError(bounds.error)) return bounds.error; + if (*value < bounds.lowerBound) *value = bounds.lowerBound; + if (*value > bounds.upperBound) *value = bounds.upperBound; + return 0; +} + +#define BOUNDCHECK(cParam, val) \ + do { \ + RETURN_ERROR_IF(!ZSTD_cParam_withinBounds(cParam,val), \ + parameter_outOfBound, "Param out of bounds"); \ + } while (0) + + +static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param) +{ + switch(param) + { + case ZSTD_c_compressionLevel: + case ZSTD_c_hashLog: + case ZSTD_c_chainLog: + case ZSTD_c_searchLog: + case ZSTD_c_minMatch: + case ZSTD_c_targetLength: + case ZSTD_c_strategy: + return 1; + + case ZSTD_c_format: + case ZSTD_c_windowLog: + case ZSTD_c_contentSizeFlag: + case ZSTD_c_checksumFlag: + case ZSTD_c_dictIDFlag: + case ZSTD_c_forceMaxWindow : + case ZSTD_c_nbWorkers: + case ZSTD_c_jobSize: + case ZSTD_c_overlapLog: + case ZSTD_c_rsyncable: + case ZSTD_c_enableDedicatedDictSearch: + case ZSTD_c_enableLongDistanceMatching: + case ZSTD_c_ldmHashLog: + case ZSTD_c_ldmMinMatch: + case ZSTD_c_ldmBucketSizeLog: + case ZSTD_c_ldmHashRateLog: + case ZSTD_c_forceAttachDict: + case ZSTD_c_literalCompressionMode: + case ZSTD_c_targetCBlockSize: + case ZSTD_c_srcSizeHint: + case ZSTD_c_stableInBuffer: + case ZSTD_c_stableOutBuffer: + case ZSTD_c_blockDelimiters: + case ZSTD_c_validateSequences: + case ZSTD_c_useBlockSplitter: + case ZSTD_c_useRowMatchFinder: + case ZSTD_c_deterministicRefPrefix: + case ZSTD_c_prefetchCDictTables: + case ZSTD_c_enableSeqProducerFallback: + case ZSTD_c_maxBlockSize: + case ZSTD_c_searchForExternalRepcodes: + default: + return 0; + } +} + +size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value) +{ + DEBUGLOG(4, "ZSTD_CCtx_setParameter (%i, %i)", (int)param, value); + if (cctx->streamStage != zcss_init) { + if (ZSTD_isUpdateAuthorized(param)) { + cctx->cParamsChanged = 1; + } else { + RETURN_ERROR(stage_wrong, "can only set params in cctx init stage"); + } } + + switch(param) + { + case ZSTD_c_nbWorkers: + RETURN_ERROR_IF((value!=0) && cctx->staticSize, parameter_unsupported, + "MT not compatible with static alloc"); + break; + + case ZSTD_c_compressionLevel: + case ZSTD_c_windowLog: + case ZSTD_c_hashLog: + case ZSTD_c_chainLog: + case ZSTD_c_searchLog: + case ZSTD_c_minMatch: + case ZSTD_c_targetLength: + case ZSTD_c_strategy: + case ZSTD_c_ldmHashRateLog: + case ZSTD_c_format: + case ZSTD_c_contentSizeFlag: + case ZSTD_c_checksumFlag: + case ZSTD_c_dictIDFlag: + case ZSTD_c_forceMaxWindow: + case ZSTD_c_forceAttachDict: + case ZSTD_c_literalCompressionMode: + case ZSTD_c_jobSize: + case ZSTD_c_overlapLog: + case ZSTD_c_rsyncable: + case ZSTD_c_enableDedicatedDictSearch: + case ZSTD_c_enableLongDistanceMatching: + case ZSTD_c_ldmHashLog: + case ZSTD_c_ldmMinMatch: + case ZSTD_c_ldmBucketSizeLog: + case ZSTD_c_targetCBlockSize: + case ZSTD_c_srcSizeHint: + case ZSTD_c_stableInBuffer: + case ZSTD_c_stableOutBuffer: + case ZSTD_c_blockDelimiters: + case ZSTD_c_validateSequences: + case ZSTD_c_useBlockSplitter: + case ZSTD_c_useRowMatchFinder: + case ZSTD_c_deterministicRefPrefix: + case ZSTD_c_prefetchCDictTables: + case ZSTD_c_enableSeqProducerFallback: + case ZSTD_c_maxBlockSize: + case ZSTD_c_searchForExternalRepcodes: + break; + + default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); + } + return ZSTD_CCtxParams_setParameter(&cctx->requestedParams, param, value); +} + +size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams, + ZSTD_cParameter param, int value) +{ + DEBUGLOG(4, "ZSTD_CCtxParams_setParameter (%i, %i)", (int)param, value); + switch(param) + { + case ZSTD_c_format : + BOUNDCHECK(ZSTD_c_format, value); + CCtxParams->format = (ZSTD_format_e)value; + return (size_t)CCtxParams->format; + + case ZSTD_c_compressionLevel : { + FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value), ""); + if (value == 0) + CCtxParams->compressionLevel = ZSTD_CLEVEL_DEFAULT; /* 0 == default */ + else + CCtxParams->compressionLevel = value; + if (CCtxParams->compressionLevel >= 0) return (size_t)CCtxParams->compressionLevel; + return 0; /* return type (size_t) cannot represent negative values */ + } + + case ZSTD_c_windowLog : + if (value!=0) /* 0 => use default */ + BOUNDCHECK(ZSTD_c_windowLog, value); + CCtxParams->cParams.windowLog = (U32)value; + return CCtxParams->cParams.windowLog; + + case ZSTD_c_hashLog : + if (value!=0) /* 0 => use default */ + BOUNDCHECK(ZSTD_c_hashLog, value); + CCtxParams->cParams.hashLog = (U32)value; + return CCtxParams->cParams.hashLog; + + case ZSTD_c_chainLog : + if (value!=0) /* 0 => use default */ + BOUNDCHECK(ZSTD_c_chainLog, value); + CCtxParams->cParams.chainLog = (U32)value; + return CCtxParams->cParams.chainLog; + + case ZSTD_c_searchLog : + if (value!=0) /* 0 => use default */ + BOUNDCHECK(ZSTD_c_searchLog, value); + CCtxParams->cParams.searchLog = (U32)value; + return (size_t)value; + + case ZSTD_c_minMatch : + if (value!=0) /* 0 => use default */ + BOUNDCHECK(ZSTD_c_minMatch, value); + CCtxParams->cParams.minMatch = (U32)value; + return CCtxParams->cParams.minMatch; + + case ZSTD_c_targetLength : + BOUNDCHECK(ZSTD_c_targetLength, value); + CCtxParams->cParams.targetLength = (U32)value; + return CCtxParams->cParams.targetLength; + + case ZSTD_c_strategy : + if (value!=0) /* 0 => use default */ + BOUNDCHECK(ZSTD_c_strategy, value); + CCtxParams->cParams.strategy = (ZSTD_strategy)value; + return (size_t)CCtxParams->cParams.strategy; + + case ZSTD_c_contentSizeFlag : + /* Content size written in frame header _when known_ (default:1) */ + DEBUGLOG(4, "set content size flag = %u", (value!=0)); + CCtxParams->fParams.contentSizeFlag = value != 0; + return (size_t)CCtxParams->fParams.contentSizeFlag; + + case ZSTD_c_checksumFlag : + /* A 32-bits content checksum will be calculated and written at end of frame (default:0) */ + CCtxParams->fParams.checksumFlag = value != 0; + return (size_t)CCtxParams->fParams.checksumFlag; + + case ZSTD_c_dictIDFlag : /* When applicable, dictionary's dictID is provided in frame header (default:1) */ + DEBUGLOG(4, "set dictIDFlag = %u", (value!=0)); + CCtxParams->fParams.noDictIDFlag = !value; + return !CCtxParams->fParams.noDictIDFlag; + + case ZSTD_c_forceMaxWindow : + CCtxParams->forceWindow = (value != 0); + return (size_t)CCtxParams->forceWindow; + + case ZSTD_c_forceAttachDict : { + const ZSTD_dictAttachPref_e pref = (ZSTD_dictAttachPref_e)value; + BOUNDCHECK(ZSTD_c_forceAttachDict, (int)pref); + CCtxParams->attachDictPref = pref; + return CCtxParams->attachDictPref; + } + + case ZSTD_c_literalCompressionMode : { + const ZSTD_paramSwitch_e lcm = (ZSTD_paramSwitch_e)value; + BOUNDCHECK(ZSTD_c_literalCompressionMode, (int)lcm); + CCtxParams->literalCompressionMode = lcm; + return CCtxParams->literalCompressionMode; + } + + case ZSTD_c_nbWorkers : +#ifndef ZSTD_MULTITHREAD + RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); + return 0; +#else + FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value), ""); + CCtxParams->nbWorkers = value; + return (size_t)(CCtxParams->nbWorkers); +#endif + + case ZSTD_c_jobSize : +#ifndef ZSTD_MULTITHREAD + RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); + return 0; +#else + /* Adjust to the minimum non-default value. */ + if (value != 0 && value < ZSTDMT_JOBSIZE_MIN) + value = ZSTDMT_JOBSIZE_MIN; + FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value), ""); + assert(value >= 0); + CCtxParams->jobSize = value; + return CCtxParams->jobSize; +#endif + + case ZSTD_c_overlapLog : +#ifndef ZSTD_MULTITHREAD + RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); + return 0; +#else + FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(ZSTD_c_overlapLog, &value), ""); + CCtxParams->overlapLog = value; + return (size_t)CCtxParams->overlapLog; +#endif + + case ZSTD_c_rsyncable : +#ifndef ZSTD_MULTITHREAD + RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); + return 0; +#else + FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(ZSTD_c_overlapLog, &value), ""); + CCtxParams->rsyncable = value; + return (size_t)CCtxParams->rsyncable; +#endif + + case ZSTD_c_enableDedicatedDictSearch : + CCtxParams->enableDedicatedDictSearch = (value!=0); + return (size_t)CCtxParams->enableDedicatedDictSearch; + + case ZSTD_c_enableLongDistanceMatching : + BOUNDCHECK(ZSTD_c_enableLongDistanceMatching, value); + CCtxParams->ldmParams.enableLdm = (ZSTD_paramSwitch_e)value; + return CCtxParams->ldmParams.enableLdm; + + case ZSTD_c_ldmHashLog : + if (value!=0) /* 0 ==> auto */ + BOUNDCHECK(ZSTD_c_ldmHashLog, value); + CCtxParams->ldmParams.hashLog = (U32)value; + return CCtxParams->ldmParams.hashLog; + + case ZSTD_c_ldmMinMatch : + if (value!=0) /* 0 ==> default */ + BOUNDCHECK(ZSTD_c_ldmMinMatch, value); + CCtxParams->ldmParams.minMatchLength = (U32)value; + return CCtxParams->ldmParams.minMatchLength; + + case ZSTD_c_ldmBucketSizeLog : + if (value!=0) /* 0 ==> default */ + BOUNDCHECK(ZSTD_c_ldmBucketSizeLog, value); + CCtxParams->ldmParams.bucketSizeLog = (U32)value; + return CCtxParams->ldmParams.bucketSizeLog; + + case ZSTD_c_ldmHashRateLog : + if (value!=0) /* 0 ==> default */ + BOUNDCHECK(ZSTD_c_ldmHashRateLog, value); + CCtxParams->ldmParams.hashRateLog = (U32)value; + return CCtxParams->ldmParams.hashRateLog; + + case ZSTD_c_targetCBlockSize : + if (value!=0) { /* 0 ==> default */ + value = MAX(value, ZSTD_TARGETCBLOCKSIZE_MIN); + BOUNDCHECK(ZSTD_c_targetCBlockSize, value); + } + CCtxParams->targetCBlockSize = (U32)value; + return CCtxParams->targetCBlockSize; + + case ZSTD_c_srcSizeHint : + if (value!=0) /* 0 ==> default */ + BOUNDCHECK(ZSTD_c_srcSizeHint, value); + CCtxParams->srcSizeHint = value; + return (size_t)CCtxParams->srcSizeHint; + + case ZSTD_c_stableInBuffer: + BOUNDCHECK(ZSTD_c_stableInBuffer, value); + CCtxParams->inBufferMode = (ZSTD_bufferMode_e)value; + return CCtxParams->inBufferMode; + + case ZSTD_c_stableOutBuffer: + BOUNDCHECK(ZSTD_c_stableOutBuffer, value); + CCtxParams->outBufferMode = (ZSTD_bufferMode_e)value; + return CCtxParams->outBufferMode; + + case ZSTD_c_blockDelimiters: + BOUNDCHECK(ZSTD_c_blockDelimiters, value); + CCtxParams->blockDelimiters = (ZSTD_sequenceFormat_e)value; + return CCtxParams->blockDelimiters; + + case ZSTD_c_validateSequences: + BOUNDCHECK(ZSTD_c_validateSequences, value); + CCtxParams->validateSequences = value; + return (size_t)CCtxParams->validateSequences; + + case ZSTD_c_useBlockSplitter: + BOUNDCHECK(ZSTD_c_useBlockSplitter, value); + CCtxParams->useBlockSplitter = (ZSTD_paramSwitch_e)value; + return CCtxParams->useBlockSplitter; + + case ZSTD_c_useRowMatchFinder: + BOUNDCHECK(ZSTD_c_useRowMatchFinder, value); + CCtxParams->useRowMatchFinder = (ZSTD_paramSwitch_e)value; + return CCtxParams->useRowMatchFinder; + + case ZSTD_c_deterministicRefPrefix: + BOUNDCHECK(ZSTD_c_deterministicRefPrefix, value); + CCtxParams->deterministicRefPrefix = !!value; + return (size_t)CCtxParams->deterministicRefPrefix; + + case ZSTD_c_prefetchCDictTables: + BOUNDCHECK(ZSTD_c_prefetchCDictTables, value); + CCtxParams->prefetchCDictTables = (ZSTD_paramSwitch_e)value; + return CCtxParams->prefetchCDictTables; + + case ZSTD_c_enableSeqProducerFallback: + BOUNDCHECK(ZSTD_c_enableSeqProducerFallback, value); + CCtxParams->enableMatchFinderFallback = value; + return (size_t)CCtxParams->enableMatchFinderFallback; + + case ZSTD_c_maxBlockSize: + if (value!=0) /* 0 ==> default */ + BOUNDCHECK(ZSTD_c_maxBlockSize, value); + CCtxParams->maxBlockSize = value; + return CCtxParams->maxBlockSize; + + case ZSTD_c_searchForExternalRepcodes: + BOUNDCHECK(ZSTD_c_searchForExternalRepcodes, value); + CCtxParams->searchForExternalRepcodes = (ZSTD_paramSwitch_e)value; + return CCtxParams->searchForExternalRepcodes; + + default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); + } +} + +size_t ZSTD_CCtx_getParameter(ZSTD_CCtx const* cctx, ZSTD_cParameter param, int* value) +{ + return ZSTD_CCtxParams_getParameter(&cctx->requestedParams, param, value); +} + +size_t ZSTD_CCtxParams_getParameter( + ZSTD_CCtx_params const* CCtxParams, ZSTD_cParameter param, int* value) +{ + switch(param) + { + case ZSTD_c_format : + *value = CCtxParams->format; + break; + case ZSTD_c_compressionLevel : + *value = CCtxParams->compressionLevel; + break; + case ZSTD_c_windowLog : + *value = (int)CCtxParams->cParams.windowLog; + break; + case ZSTD_c_hashLog : + *value = (int)CCtxParams->cParams.hashLog; + break; + case ZSTD_c_chainLog : + *value = (int)CCtxParams->cParams.chainLog; + break; + case ZSTD_c_searchLog : + *value = CCtxParams->cParams.searchLog; + break; + case ZSTD_c_minMatch : + *value = CCtxParams->cParams.minMatch; + break; + case ZSTD_c_targetLength : + *value = CCtxParams->cParams.targetLength; + break; + case ZSTD_c_strategy : + *value = (unsigned)CCtxParams->cParams.strategy; + break; + case ZSTD_c_contentSizeFlag : + *value = CCtxParams->fParams.contentSizeFlag; + break; + case ZSTD_c_checksumFlag : + *value = CCtxParams->fParams.checksumFlag; + break; + case ZSTD_c_dictIDFlag : + *value = !CCtxParams->fParams.noDictIDFlag; + break; + case ZSTD_c_forceMaxWindow : + *value = CCtxParams->forceWindow; + break; + case ZSTD_c_forceAttachDict : + *value = CCtxParams->attachDictPref; + break; + case ZSTD_c_literalCompressionMode : + *value = CCtxParams->literalCompressionMode; + break; + case ZSTD_c_nbWorkers : +#ifndef ZSTD_MULTITHREAD + assert(CCtxParams->nbWorkers == 0); +#endif + *value = CCtxParams->nbWorkers; + break; + case ZSTD_c_jobSize : +#ifndef ZSTD_MULTITHREAD + RETURN_ERROR(parameter_unsupported, "not compiled with multithreading"); +#else + assert(CCtxParams->jobSize <= INT_MAX); + *value = (int)CCtxParams->jobSize; + break; +#endif + case ZSTD_c_overlapLog : +#ifndef ZSTD_MULTITHREAD + RETURN_ERROR(parameter_unsupported, "not compiled with multithreading"); +#else + *value = CCtxParams->overlapLog; + break; +#endif + case ZSTD_c_rsyncable : +#ifndef ZSTD_MULTITHREAD + RETURN_ERROR(parameter_unsupported, "not compiled with multithreading"); +#else + *value = CCtxParams->rsyncable; + break; +#endif + case ZSTD_c_enableDedicatedDictSearch : + *value = CCtxParams->enableDedicatedDictSearch; + break; + case ZSTD_c_enableLongDistanceMatching : + *value = CCtxParams->ldmParams.enableLdm; + break; + case ZSTD_c_ldmHashLog : + *value = CCtxParams->ldmParams.hashLog; + break; + case ZSTD_c_ldmMinMatch : + *value = CCtxParams->ldmParams.minMatchLength; + break; + case ZSTD_c_ldmBucketSizeLog : + *value = CCtxParams->ldmParams.bucketSizeLog; + break; + case ZSTD_c_ldmHashRateLog : + *value = CCtxParams->ldmParams.hashRateLog; + break; + case ZSTD_c_targetCBlockSize : + *value = (int)CCtxParams->targetCBlockSize; + break; + case ZSTD_c_srcSizeHint : + *value = (int)CCtxParams->srcSizeHint; + break; + case ZSTD_c_stableInBuffer : + *value = (int)CCtxParams->inBufferMode; + break; + case ZSTD_c_stableOutBuffer : + *value = (int)CCtxParams->outBufferMode; + break; + case ZSTD_c_blockDelimiters : + *value = (int)CCtxParams->blockDelimiters; + break; + case ZSTD_c_validateSequences : + *value = (int)CCtxParams->validateSequences; + break; + case ZSTD_c_useBlockSplitter : + *value = (int)CCtxParams->useBlockSplitter; + break; + case ZSTD_c_useRowMatchFinder : + *value = (int)CCtxParams->useRowMatchFinder; + break; + case ZSTD_c_deterministicRefPrefix: + *value = (int)CCtxParams->deterministicRefPrefix; + break; + case ZSTD_c_prefetchCDictTables: + *value = (int)CCtxParams->prefetchCDictTables; + break; + case ZSTD_c_enableSeqProducerFallback: + *value = CCtxParams->enableMatchFinderFallback; + break; + case ZSTD_c_maxBlockSize: + *value = (int)CCtxParams->maxBlockSize; + break; + case ZSTD_c_searchForExternalRepcodes: + *value = (int)CCtxParams->searchForExternalRepcodes; + break; + default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); + } + return 0; +} + +/** ZSTD_CCtx_setParametersUsingCCtxParams() : + * just applies `params` into `cctx` + * no action is performed, parameters are merely stored. + * If ZSTDMT is enabled, parameters are pushed to cctx->mtctx. + * This is possible even if a compression is ongoing. + * In which case, new parameters will be applied on the fly, starting with next compression job. + */ +size_t ZSTD_CCtx_setParametersUsingCCtxParams( + ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params) +{ + DEBUGLOG(4, "ZSTD_CCtx_setParametersUsingCCtxParams"); + RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, + "The context is in the wrong stage!"); + RETURN_ERROR_IF(cctx->cdict, stage_wrong, + "Can't override parameters with cdict attached (some must " + "be inherited from the cdict)."); + + cctx->requestedParams = *params; + return 0; +} + +size_t ZSTD_CCtx_setCParams(ZSTD_CCtx* cctx, ZSTD_compressionParameters cparams) +{ + ZSTD_STATIC_ASSERT(sizeof(cparams) == 7 * 4 /* all params are listed below */); + DEBUGLOG(4, "ZSTD_CCtx_setCParams"); + /* only update if all parameters are valid */ + FORWARD_IF_ERROR(ZSTD_checkCParams(cparams), ""); + FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_windowLog, cparams.windowLog), ""); + FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_chainLog, cparams.chainLog), ""); + FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_hashLog, cparams.hashLog), ""); + FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_searchLog, cparams.searchLog), ""); + FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_minMatch, cparams.minMatch), ""); + FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_targetLength, cparams.targetLength), ""); + FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_strategy, cparams.strategy), ""); + return 0; +} + +size_t ZSTD_CCtx_setFParams(ZSTD_CCtx* cctx, ZSTD_frameParameters fparams) +{ + ZSTD_STATIC_ASSERT(sizeof(fparams) == 3 * 4 /* all params are listed below */); + DEBUGLOG(4, "ZSTD_CCtx_setFParams"); + FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_contentSizeFlag, fparams.contentSizeFlag != 0), ""); + FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, fparams.checksumFlag != 0), ""); + FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(cctx, ZSTD_c_dictIDFlag, fparams.noDictIDFlag == 0), ""); + return 0; +} + +size_t ZSTD_CCtx_setParams(ZSTD_CCtx* cctx, ZSTD_parameters params) +{ + DEBUGLOG(4, "ZSTD_CCtx_setParams"); + /* First check cParams, because we want to update all or none. */ + FORWARD_IF_ERROR(ZSTD_checkCParams(params.cParams), ""); + /* Next set fParams, because this could fail if the cctx isn't in init stage. */ + FORWARD_IF_ERROR(ZSTD_CCtx_setFParams(cctx, params.fParams), ""); + /* Finally set cParams, which should succeed. */ + FORWARD_IF_ERROR(ZSTD_CCtx_setCParams(cctx, params.cParams), ""); + return 0; +} + +size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize) +{ + DEBUGLOG(4, "ZSTD_CCtx_setPledgedSrcSize to %llu bytes", pledgedSrcSize); + RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, + "Can't set pledgedSrcSize when not in init stage."); + cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1; + return 0; +} + +static ZSTD_compressionParameters ZSTD_dedicatedDictSearch_getCParams( + int const compressionLevel, + size_t const dictSize); +static int ZSTD_dedicatedDictSearch_isSupported( + const ZSTD_compressionParameters* cParams); +static void ZSTD_dedicatedDictSearch_revertCParams( + ZSTD_compressionParameters* cParams); + +/** + * Initializes the local dictionary using requested parameters. + * NOTE: Initialization does not employ the pledged src size, + * because the dictionary may be used for multiple compressions. + */ +static size_t ZSTD_initLocalDict(ZSTD_CCtx* cctx) +{ + ZSTD_localDict* const dl = &cctx->localDict; + if (dl->dict == NULL) { + /* No local dictionary. */ + assert(dl->dictBuffer == NULL); + assert(dl->cdict == NULL); + assert(dl->dictSize == 0); + return 0; + } + if (dl->cdict != NULL) { + /* Local dictionary already initialized. */ + assert(cctx->cdict == dl->cdict); + return 0; + } + assert(dl->dictSize > 0); + assert(cctx->cdict == NULL); + assert(cctx->prefixDict.dict == NULL); + + dl->cdict = ZSTD_createCDict_advanced2( + dl->dict, + dl->dictSize, + ZSTD_dlm_byRef, + dl->dictContentType, + &cctx->requestedParams, + cctx->customMem); + RETURN_ERROR_IF(!dl->cdict, memory_allocation, "ZSTD_createCDict_advanced failed"); + cctx->cdict = dl->cdict; + return 0; +} + +size_t ZSTD_CCtx_loadDictionary_advanced( + ZSTD_CCtx* cctx, + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType) +{ + DEBUGLOG(4, "ZSTD_CCtx_loadDictionary_advanced (size: %u)", (U32)dictSize); + RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, + "Can't load a dictionary when cctx is not in init stage."); + ZSTD_clearAllDicts(cctx); /* erase any previously set dictionary */ + if (dict == NULL || dictSize == 0) /* no dictionary */ + return 0; + if (dictLoadMethod == ZSTD_dlm_byRef) { + cctx->localDict.dict = dict; + } else { + /* copy dictionary content inside CCtx to own its lifetime */ + void* dictBuffer; + RETURN_ERROR_IF(cctx->staticSize, memory_allocation, + "static CCtx can't allocate for an internal copy of dictionary"); + dictBuffer = ZSTD_customMalloc(dictSize, cctx->customMem); + RETURN_ERROR_IF(dictBuffer==NULL, memory_allocation, + "allocation failed for dictionary content"); + ZSTD_memcpy(dictBuffer, dict, dictSize); + cctx->localDict.dictBuffer = dictBuffer; /* owned ptr to free */ + cctx->localDict.dict = dictBuffer; /* read-only reference */ + } + cctx->localDict.dictSize = dictSize; + cctx->localDict.dictContentType = dictContentType; + return 0; +} + +size_t ZSTD_CCtx_loadDictionary_byReference( + ZSTD_CCtx* cctx, const void* dict, size_t dictSize) +{ + return ZSTD_CCtx_loadDictionary_advanced( + cctx, dict, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto); +} + +size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize) +{ + return ZSTD_CCtx_loadDictionary_advanced( + cctx, dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto); +} + + +size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict) +{ + RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, + "Can't ref a dict when ctx not in init stage."); + /* Free the existing local cdict (if any) to save memory. */ + ZSTD_clearAllDicts(cctx); + cctx->cdict = cdict; + return 0; +} + +size_t ZSTD_CCtx_refThreadPool(ZSTD_CCtx* cctx, ZSTD_threadPool* pool) +{ + RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, + "Can't ref a pool when ctx not in init stage."); + cctx->pool = pool; + return 0; +} + +size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize) +{ + return ZSTD_CCtx_refPrefix_advanced(cctx, prefix, prefixSize, ZSTD_dct_rawContent); +} + +size_t ZSTD_CCtx_refPrefix_advanced( + ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType) +{ + RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, + "Can't ref a prefix when ctx not in init stage."); + ZSTD_clearAllDicts(cctx); + if (prefix != NULL && prefixSize > 0) { + cctx->prefixDict.dict = prefix; + cctx->prefixDict.dictSize = prefixSize; + cctx->prefixDict.dictContentType = dictContentType; + } + return 0; +} + +/*! ZSTD_CCtx_reset() : + * Also dumps dictionary */ +size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset) +{ + if ( (reset == ZSTD_reset_session_only) + || (reset == ZSTD_reset_session_and_parameters) ) { + cctx->streamStage = zcss_init; + cctx->pledgedSrcSizePlusOne = 0; + } + if ( (reset == ZSTD_reset_parameters) + || (reset == ZSTD_reset_session_and_parameters) ) { + RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, + "Reset parameters is only possible during init stage."); + ZSTD_clearAllDicts(cctx); + return ZSTD_CCtxParams_reset(&cctx->requestedParams); + } + return 0; +} + + +/** ZSTD_checkCParams() : + control CParam values remain within authorized range. + @return : 0, or an error code if one value is beyond authorized range */ +size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams) +{ + BOUNDCHECK(ZSTD_c_windowLog, (int)cParams.windowLog); + BOUNDCHECK(ZSTD_c_chainLog, (int)cParams.chainLog); + BOUNDCHECK(ZSTD_c_hashLog, (int)cParams.hashLog); + BOUNDCHECK(ZSTD_c_searchLog, (int)cParams.searchLog); + BOUNDCHECK(ZSTD_c_minMatch, (int)cParams.minMatch); + BOUNDCHECK(ZSTD_c_targetLength,(int)cParams.targetLength); + BOUNDCHECK(ZSTD_c_strategy, cParams.strategy); + return 0; +} + +/** ZSTD_clampCParams() : + * make CParam values within valid range. + * @return : valid CParams */ +static ZSTD_compressionParameters +ZSTD_clampCParams(ZSTD_compressionParameters cParams) +{ +# define CLAMP_TYPE(cParam, val, type) \ + do { \ + ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); \ + if ((int)valbounds.upperBound) val=(type)bounds.upperBound; \ + } while (0) +# define CLAMP(cParam, val) CLAMP_TYPE(cParam, val, unsigned) + CLAMP(ZSTD_c_windowLog, cParams.windowLog); + CLAMP(ZSTD_c_chainLog, cParams.chainLog); + CLAMP(ZSTD_c_hashLog, cParams.hashLog); + CLAMP(ZSTD_c_searchLog, cParams.searchLog); + CLAMP(ZSTD_c_minMatch, cParams.minMatch); + CLAMP(ZSTD_c_targetLength,cParams.targetLength); + CLAMP_TYPE(ZSTD_c_strategy,cParams.strategy, ZSTD_strategy); + return cParams; +} + +/** ZSTD_cycleLog() : + * condition for correct operation : hashLog > 1 */ +U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat) +{ + U32 const btScale = ((U32)strat >= (U32)ZSTD_btlazy2); + return hashLog - btScale; +} + +/** ZSTD_dictAndWindowLog() : + * Returns an adjusted window log that is large enough to fit the source and the dictionary. + * The zstd format says that the entire dictionary is valid if one byte of the dictionary + * is within the window. So the hashLog and chainLog should be large enough to reference both + * the dictionary and the window. So we must use this adjusted dictAndWindowLog when downsizing + * the hashLog and windowLog. + * NOTE: srcSize must not be ZSTD_CONTENTSIZE_UNKNOWN. + */ +static U32 ZSTD_dictAndWindowLog(U32 windowLog, U64 srcSize, U64 dictSize) +{ + const U64 maxWindowSize = 1ULL << ZSTD_WINDOWLOG_MAX; + /* No dictionary ==> No change */ + if (dictSize == 0) { + return windowLog; + } + assert(windowLog <= ZSTD_WINDOWLOG_MAX); + assert(srcSize != ZSTD_CONTENTSIZE_UNKNOWN); /* Handled in ZSTD_adjustCParams_internal() */ + { + U64 const windowSize = 1ULL << windowLog; + U64 const dictAndWindowSize = dictSize + windowSize; + /* If the window size is already large enough to fit both the source and the dictionary + * then just use the window size. Otherwise adjust so that it fits the dictionary and + * the window. + */ + if (windowSize >= dictSize + srcSize) { + return windowLog; /* Window size large enough already */ + } else if (dictAndWindowSize >= maxWindowSize) { + return ZSTD_WINDOWLOG_MAX; /* Larger than max window log */ + } else { + return ZSTD_highbit32((U32)dictAndWindowSize - 1) + 1; + } + } +} + +/** ZSTD_adjustCParams_internal() : + * optimize `cPar` for a specified input (`srcSize` and `dictSize`). + * mostly downsize to reduce memory consumption and initialization latency. + * `srcSize` can be ZSTD_CONTENTSIZE_UNKNOWN when not known. + * `mode` is the mode for parameter adjustment. See docs for `ZSTD_cParamMode_e`. + * note : `srcSize==0` means 0! + * condition : cPar is presumed validated (can be checked using ZSTD_checkCParams()). */ +static ZSTD_compressionParameters +ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar, + unsigned long long srcSize, + size_t dictSize, + ZSTD_cParamMode_e mode, + ZSTD_paramSwitch_e useRowMatchFinder) +{ + const U64 minSrcSize = 513; /* (1<<9) + 1 */ + const U64 maxWindowResize = 1ULL << (ZSTD_WINDOWLOG_MAX-1); + assert(ZSTD_checkCParams(cPar)==0); + + /* Cascade the selected strategy down to the next-highest one built into + * this binary. */ +#ifdef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR + if (cPar.strategy == ZSTD_btultra2) { + cPar.strategy = ZSTD_btultra; + } + if (cPar.strategy == ZSTD_btultra) { + cPar.strategy = ZSTD_btopt; + } +#endif +#ifdef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR + if (cPar.strategy == ZSTD_btopt) { + cPar.strategy = ZSTD_btlazy2; + } +#endif +#ifdef ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR + if (cPar.strategy == ZSTD_btlazy2) { + cPar.strategy = ZSTD_lazy2; + } +#endif +#ifdef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR + if (cPar.strategy == ZSTD_lazy2) { + cPar.strategy = ZSTD_lazy; + } +#endif +#ifdef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR + if (cPar.strategy == ZSTD_lazy) { + cPar.strategy = ZSTD_greedy; + } +#endif +#ifdef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR + if (cPar.strategy == ZSTD_greedy) { + cPar.strategy = ZSTD_dfast; + } +#endif +#ifdef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR + if (cPar.strategy == ZSTD_dfast) { + cPar.strategy = ZSTD_fast; + cPar.targetLength = 0; + } +#endif + + switch (mode) { + case ZSTD_cpm_unknown: + case ZSTD_cpm_noAttachDict: + /* If we don't know the source size, don't make any + * assumptions about it. We will already have selected + * smaller parameters if a dictionary is in use. + */ + break; + case ZSTD_cpm_createCDict: + /* Assume a small source size when creating a dictionary + * with an unknown source size. + */ + if (dictSize && srcSize == ZSTD_CONTENTSIZE_UNKNOWN) + srcSize = minSrcSize; + break; + case ZSTD_cpm_attachDict: + /* Dictionary has its own dedicated parameters which have + * already been selected. We are selecting parameters + * for only the source. + */ + dictSize = 0; + break; + default: + assert(0); + break; + } + + /* resize windowLog if input is small enough, to use less memory */ + if ( (srcSize <= maxWindowResize) + && (dictSize <= maxWindowResize) ) { + U32 const tSize = (U32)(srcSize + dictSize); + static U32 const hashSizeMin = 1 << ZSTD_HASHLOG_MIN; + U32 const srcLog = (tSize < hashSizeMin) ? ZSTD_HASHLOG_MIN : + ZSTD_highbit32(tSize-1) + 1; + if (cPar.windowLog > srcLog) cPar.windowLog = srcLog; + } + if (srcSize != ZSTD_CONTENTSIZE_UNKNOWN) { + U32 const dictAndWindowLog = ZSTD_dictAndWindowLog(cPar.windowLog, (U64)srcSize, (U64)dictSize); + U32 const cycleLog = ZSTD_cycleLog(cPar.chainLog, cPar.strategy); + if (cPar.hashLog > dictAndWindowLog+1) cPar.hashLog = dictAndWindowLog+1; + if (cycleLog > dictAndWindowLog) + cPar.chainLog -= (cycleLog - dictAndWindowLog); + } + + if (cPar.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN) + cPar.windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN; /* minimum wlog required for valid frame header */ + + /* We can't use more than 32 bits of hash in total, so that means that we require: + * (hashLog + 8) <= 32 && (chainLog + 8) <= 32 + */ + if (mode == ZSTD_cpm_createCDict && ZSTD_CDictIndicesAreTagged(&cPar)) { + U32 const maxShortCacheHashLog = 32 - ZSTD_SHORT_CACHE_TAG_BITS; + if (cPar.hashLog > maxShortCacheHashLog) { + cPar.hashLog = maxShortCacheHashLog; + } + if (cPar.chainLog > maxShortCacheHashLog) { + cPar.chainLog = maxShortCacheHashLog; + } + } + + + /* At this point, we aren't 100% sure if we are using the row match finder. + * Unless it is explicitly disabled, conservatively assume that it is enabled. + * In this case it will only be disabled for small sources, so shrinking the + * hash log a little bit shouldn't result in any ratio loss. + */ + if (useRowMatchFinder == ZSTD_ps_auto) + useRowMatchFinder = ZSTD_ps_enable; + + /* We can't hash more than 32-bits in total. So that means that we require: + * (hashLog - rowLog + 8) <= 32 + */ + if (ZSTD_rowMatchFinderUsed(cPar.strategy, useRowMatchFinder)) { + /* Switch to 32-entry rows if searchLog is 5 (or more) */ + U32 const rowLog = BOUNDED(4, cPar.searchLog, 6); + U32 const maxRowHashLog = 32 - ZSTD_ROW_HASH_TAG_BITS; + U32 const maxHashLog = maxRowHashLog + rowLog; + assert(cPar.hashLog >= rowLog); + if (cPar.hashLog > maxHashLog) { + cPar.hashLog = maxHashLog; + } + } + + return cPar; +} + +ZSTD_compressionParameters +ZSTD_adjustCParams(ZSTD_compressionParameters cPar, + unsigned long long srcSize, + size_t dictSize) +{ + cPar = ZSTD_clampCParams(cPar); /* resulting cPar is necessarily valid (all parameters within range) */ + if (srcSize == 0) srcSize = ZSTD_CONTENTSIZE_UNKNOWN; + return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize, ZSTD_cpm_unknown, ZSTD_ps_auto); +} + +static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode); +static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode); + +static void ZSTD_overrideCParams( + ZSTD_compressionParameters* cParams, + const ZSTD_compressionParameters* overrides) +{ + if (overrides->windowLog) cParams->windowLog = overrides->windowLog; + if (overrides->hashLog) cParams->hashLog = overrides->hashLog; + if (overrides->chainLog) cParams->chainLog = overrides->chainLog; + if (overrides->searchLog) cParams->searchLog = overrides->searchLog; + if (overrides->minMatch) cParams->minMatch = overrides->minMatch; + if (overrides->targetLength) cParams->targetLength = overrides->targetLength; + if (overrides->strategy) cParams->strategy = overrides->strategy; +} + +ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams( + const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode) +{ + ZSTD_compressionParameters cParams; + if (srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN && CCtxParams->srcSizeHint > 0) { + srcSizeHint = CCtxParams->srcSizeHint; + } + cParams = ZSTD_getCParams_internal(CCtxParams->compressionLevel, srcSizeHint, dictSize, mode); + if (CCtxParams->ldmParams.enableLdm == ZSTD_ps_enable) cParams.windowLog = ZSTD_LDM_DEFAULT_WINDOW_LOG; + ZSTD_overrideCParams(&cParams, &CCtxParams->cParams); + assert(!ZSTD_checkCParams(cParams)); + /* srcSizeHint == 0 means 0 */ + return ZSTD_adjustCParams_internal(cParams, srcSizeHint, dictSize, mode, CCtxParams->useRowMatchFinder); +} + +static size_t +ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams, + const ZSTD_paramSwitch_e useRowMatchFinder, + const U32 enableDedicatedDictSearch, + const U32 forCCtx) +{ + /* chain table size should be 0 for fast or row-hash strategies */ + size_t const chainSize = ZSTD_allocateChainTable(cParams->strategy, useRowMatchFinder, enableDedicatedDictSearch && !forCCtx) + ? ((size_t)1 << cParams->chainLog) + : 0; + size_t const hSize = ((size_t)1) << cParams->hashLog; + U32 const hashLog3 = (forCCtx && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0; + size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0; + /* We don't use ZSTD_cwksp_alloc_size() here because the tables aren't + * surrounded by redzones in ASAN. */ + size_t const tableSpace = chainSize * sizeof(U32) + + hSize * sizeof(U32) + + h3Size * sizeof(U32); + size_t const optPotentialSpace = + ZSTD_cwksp_aligned_alloc_size((MaxML+1) * sizeof(U32)) + + ZSTD_cwksp_aligned_alloc_size((MaxLL+1) * sizeof(U32)) + + ZSTD_cwksp_aligned_alloc_size((MaxOff+1) * sizeof(U32)) + + ZSTD_cwksp_aligned_alloc_size((1<strategy, useRowMatchFinder) + ? ZSTD_cwksp_aligned_alloc_size(hSize) + : 0; + size_t const optSpace = (forCCtx && (cParams->strategy >= ZSTD_btopt)) + ? optPotentialSpace + : 0; + size_t const slackSpace = ZSTD_cwksp_slack_space_required(); + + /* tables are guaranteed to be sized in multiples of 64 bytes (or 16 uint32_t) */ + ZSTD_STATIC_ASSERT(ZSTD_HASHLOG_MIN >= 4 && ZSTD_WINDOWLOG_MIN >= 4 && ZSTD_CHAINLOG_MIN >= 4); + assert(useRowMatchFinder != ZSTD_ps_auto); + + DEBUGLOG(4, "chainSize: %u - hSize: %u - h3Size: %u", + (U32)chainSize, (U32)hSize, (U32)h3Size); + return tableSpace + optSpace + slackSpace + lazyAdditionalSpace; +} + +/* Helper function for calculating memory requirements. + * Gives a tighter bound than ZSTD_sequenceBound() by taking minMatch into account. */ +static size_t ZSTD_maxNbSeq(size_t blockSize, unsigned minMatch, int useSequenceProducer) { + U32 const divider = (minMatch==3 || useSequenceProducer) ? 3 : 4; + return blockSize / divider; +} + +static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal( + const ZSTD_compressionParameters* cParams, + const ldmParams_t* ldmParams, + const int isStatic, + const ZSTD_paramSwitch_e useRowMatchFinder, + const size_t buffInSize, + const size_t buffOutSize, + const U64 pledgedSrcSize, + int useSequenceProducer, + size_t maxBlockSize) +{ + size_t const windowSize = (size_t) BOUNDED(1ULL, 1ULL << cParams->windowLog, pledgedSrcSize); + size_t const blockSize = MIN(ZSTD_resolveMaxBlockSize(maxBlockSize), windowSize); + size_t const maxNbSeq = ZSTD_maxNbSeq(blockSize, cParams->minMatch, useSequenceProducer); + size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize) + + ZSTD_cwksp_aligned_alloc_size(maxNbSeq * sizeof(seqDef)) + + 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE)); + size_t const entropySpace = ZSTD_cwksp_alloc_size(ENTROPY_WORKSPACE_SIZE); + size_t const blockStateSpace = 2 * ZSTD_cwksp_alloc_size(sizeof(ZSTD_compressedBlockState_t)); + size_t const matchStateSize = ZSTD_sizeof_matchState(cParams, useRowMatchFinder, /* enableDedicatedDictSearch */ 0, /* forCCtx */ 1); + + size_t const ldmSpace = ZSTD_ldm_getTableSize(*ldmParams); + size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(*ldmParams, blockSize); + size_t const ldmSeqSpace = ldmParams->enableLdm == ZSTD_ps_enable ? + ZSTD_cwksp_aligned_alloc_size(maxNbLdmSeq * sizeof(rawSeq)) : 0; + + + size_t const bufferSpace = ZSTD_cwksp_alloc_size(buffInSize) + + ZSTD_cwksp_alloc_size(buffOutSize); + + size_t const cctxSpace = isStatic ? ZSTD_cwksp_alloc_size(sizeof(ZSTD_CCtx)) : 0; + + size_t const maxNbExternalSeq = ZSTD_sequenceBound(blockSize); + size_t const externalSeqSpace = useSequenceProducer + ? ZSTD_cwksp_aligned_alloc_size(maxNbExternalSeq * sizeof(ZSTD_Sequence)) + : 0; + + size_t const neededSpace = + cctxSpace + + entropySpace + + blockStateSpace + + ldmSpace + + ldmSeqSpace + + matchStateSize + + tokenSpace + + bufferSpace + + externalSeqSpace; + + DEBUGLOG(5, "estimate workspace : %u", (U32)neededSpace); + return neededSpace; +} + +size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params) +{ + ZSTD_compressionParameters const cParams = + ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict); + ZSTD_paramSwitch_e const useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params->useRowMatchFinder, + &cParams); + + RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only."); + /* estimateCCtxSize is for one-shot compression. So no buffers should + * be needed. However, we still allocate two 0-sized buffers, which can + * take space under ASAN. */ + return ZSTD_estimateCCtxSize_usingCCtxParams_internal( + &cParams, ¶ms->ldmParams, 1, useRowMatchFinder, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN, ZSTD_hasExtSeqProd(params), params->maxBlockSize); +} + +size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams) +{ + ZSTD_CCtx_params initialParams = ZSTD_makeCCtxParamsFromCParams(cParams); + if (ZSTD_rowMatchFinderSupported(cParams.strategy)) { + /* Pick bigger of not using and using row-based matchfinder for greedy and lazy strategies */ + size_t noRowCCtxSize; + size_t rowCCtxSize; + initialParams.useRowMatchFinder = ZSTD_ps_disable; + noRowCCtxSize = ZSTD_estimateCCtxSize_usingCCtxParams(&initialParams); + initialParams.useRowMatchFinder = ZSTD_ps_enable; + rowCCtxSize = ZSTD_estimateCCtxSize_usingCCtxParams(&initialParams); + return MAX(noRowCCtxSize, rowCCtxSize); + } else { + return ZSTD_estimateCCtxSize_usingCCtxParams(&initialParams); + } +} + +static size_t ZSTD_estimateCCtxSize_internal(int compressionLevel) +{ + int tier = 0; + size_t largestSize = 0; + static const unsigned long long srcSizeTiers[4] = {16 KB, 128 KB, 256 KB, ZSTD_CONTENTSIZE_UNKNOWN}; + for (; tier < 4; ++tier) { + /* Choose the set of cParams for a given level across all srcSizes that give the largest cctxSize */ + ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, srcSizeTiers[tier], 0, ZSTD_cpm_noAttachDict); + largestSize = MAX(ZSTD_estimateCCtxSize_usingCParams(cParams), largestSize); + } + return largestSize; +} + +size_t ZSTD_estimateCCtxSize(int compressionLevel) +{ + int level; + size_t memBudget = 0; + for (level=MIN(compressionLevel, 1); level<=compressionLevel; level++) { + /* Ensure monotonically increasing memory usage as compression level increases */ + size_t const newMB = ZSTD_estimateCCtxSize_internal(level); + if (newMB > memBudget) memBudget = newMB; + } + return memBudget; +} + +size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params) +{ + RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only."); + { ZSTD_compressionParameters const cParams = + ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict); + size_t const blockSize = MIN(ZSTD_resolveMaxBlockSize(params->maxBlockSize), (size_t)1 << cParams.windowLog); + size_t const inBuffSize = (params->inBufferMode == ZSTD_bm_buffered) + ? ((size_t)1 << cParams.windowLog) + blockSize + : 0; + size_t const outBuffSize = (params->outBufferMode == ZSTD_bm_buffered) + ? ZSTD_compressBound(blockSize) + 1 + : 0; + ZSTD_paramSwitch_e const useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params->useRowMatchFinder, ¶ms->cParams); + + return ZSTD_estimateCCtxSize_usingCCtxParams_internal( + &cParams, ¶ms->ldmParams, 1, useRowMatchFinder, inBuffSize, outBuffSize, + ZSTD_CONTENTSIZE_UNKNOWN, ZSTD_hasExtSeqProd(params), params->maxBlockSize); + } +} + +size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams) +{ + ZSTD_CCtx_params initialParams = ZSTD_makeCCtxParamsFromCParams(cParams); + if (ZSTD_rowMatchFinderSupported(cParams.strategy)) { + /* Pick bigger of not using and using row-based matchfinder for greedy and lazy strategies */ + size_t noRowCCtxSize; + size_t rowCCtxSize; + initialParams.useRowMatchFinder = ZSTD_ps_disable; + noRowCCtxSize = ZSTD_estimateCStreamSize_usingCCtxParams(&initialParams); + initialParams.useRowMatchFinder = ZSTD_ps_enable; + rowCCtxSize = ZSTD_estimateCStreamSize_usingCCtxParams(&initialParams); + return MAX(noRowCCtxSize, rowCCtxSize); + } else { + return ZSTD_estimateCStreamSize_usingCCtxParams(&initialParams); + } +} + +static size_t ZSTD_estimateCStreamSize_internal(int compressionLevel) +{ + ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict); + return ZSTD_estimateCStreamSize_usingCParams(cParams); +} + +size_t ZSTD_estimateCStreamSize(int compressionLevel) +{ + int level; + size_t memBudget = 0; + for (level=MIN(compressionLevel, 1); level<=compressionLevel; level++) { + size_t const newMB = ZSTD_estimateCStreamSize_internal(level); + if (newMB > memBudget) memBudget = newMB; + } + return memBudget; +} + +/* ZSTD_getFrameProgression(): + * tells how much data has been consumed (input) and produced (output) for current frame. + * able to count progression inside worker threads (non-blocking mode). + */ +ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx) +{ +#ifdef ZSTD_MULTITHREAD + if (cctx->appliedParams.nbWorkers > 0) { + return ZSTDMT_getFrameProgression(cctx->mtctx); + } +#endif + { ZSTD_frameProgression fp; + size_t const buffered = (cctx->inBuff == NULL) ? 0 : + cctx->inBuffPos - cctx->inToCompress; + if (buffered) assert(cctx->inBuffPos >= cctx->inToCompress); + assert(buffered <= ZSTD_BLOCKSIZE_MAX); + fp.ingested = cctx->consumedSrcSize + buffered; + fp.consumed = cctx->consumedSrcSize; + fp.produced = cctx->producedCSize; + fp.flushed = cctx->producedCSize; /* simplified; some data might still be left within streaming output buffer */ + fp.currentJobID = 0; + fp.nbActiveWorkers = 0; + return fp; +} } + +/*! ZSTD_toFlushNow() + * Only useful for multithreading scenarios currently (nbWorkers >= 1). + */ +size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx) +{ +#ifdef ZSTD_MULTITHREAD + if (cctx->appliedParams.nbWorkers > 0) { + return ZSTDMT_toFlushNow(cctx->mtctx); + } +#endif + (void)cctx; + return 0; /* over-simplification; could also check if context is currently running in streaming mode, and in which case, report how many bytes are left to be flushed within output buffer */ +} + +static void ZSTD_assertEqualCParams(ZSTD_compressionParameters cParams1, + ZSTD_compressionParameters cParams2) +{ + (void)cParams1; + (void)cParams2; + assert(cParams1.windowLog == cParams2.windowLog); + assert(cParams1.chainLog == cParams2.chainLog); + assert(cParams1.hashLog == cParams2.hashLog); + assert(cParams1.searchLog == cParams2.searchLog); + assert(cParams1.minMatch == cParams2.minMatch); + assert(cParams1.targetLength == cParams2.targetLength); + assert(cParams1.strategy == cParams2.strategy); +} + +void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs) +{ + int i; + for (i = 0; i < ZSTD_REP_NUM; ++i) + bs->rep[i] = repStartValue[i]; + bs->entropy.huf.repeatMode = HUF_repeat_none; + bs->entropy.fse.offcode_repeatMode = FSE_repeat_none; + bs->entropy.fse.matchlength_repeatMode = FSE_repeat_none; + bs->entropy.fse.litlength_repeatMode = FSE_repeat_none; +} + +/*! ZSTD_invalidateMatchState() + * Invalidate all the matches in the match finder tables. + * Requires nextSrc and base to be set (can be NULL). + */ +static void ZSTD_invalidateMatchState(ZSTD_matchState_t* ms) +{ + ZSTD_window_clear(&ms->window); + + ms->nextToUpdate = ms->window.dictLimit; + ms->loadedDictEnd = 0; + ms->opt.litLengthSum = 0; /* force reset of btopt stats */ + ms->dictMatchState = NULL; +} + +/** + * Controls, for this matchState reset, whether the tables need to be cleared / + * prepared for the coming compression (ZSTDcrp_makeClean), or whether the + * tables can be left unclean (ZSTDcrp_leaveDirty), because we know that a + * subsequent operation will overwrite the table space anyways (e.g., copying + * the matchState contents in from a CDict). + */ +typedef enum { + ZSTDcrp_makeClean, + ZSTDcrp_leaveDirty +} ZSTD_compResetPolicy_e; + +/** + * Controls, for this matchState reset, whether indexing can continue where it + * left off (ZSTDirp_continue), or whether it needs to be restarted from zero + * (ZSTDirp_reset). + */ +typedef enum { + ZSTDirp_continue, + ZSTDirp_reset +} ZSTD_indexResetPolicy_e; + +typedef enum { + ZSTD_resetTarget_CDict, + ZSTD_resetTarget_CCtx +} ZSTD_resetTarget_e; + +/* Mixes bits in a 64 bits in a value, based on XXH3_rrmxmx */ +static U64 ZSTD_bitmix(U64 val, U64 len) { + val ^= ZSTD_rotateRight_U64(val, 49) ^ ZSTD_rotateRight_U64(val, 24); + val *= 0x9FB21C651E98DF25ULL; + val ^= (val >> 35) + len ; + val *= 0x9FB21C651E98DF25ULL; + return val ^ (val >> 28); +} + +/* Mixes in the hashSalt and hashSaltEntropy to create a new hashSalt */ +static void ZSTD_advanceHashSalt(ZSTD_matchState_t* ms) { + ms->hashSalt = ZSTD_bitmix(ms->hashSalt, 8) ^ ZSTD_bitmix((U64) ms->hashSaltEntropy, 4); +} + +static size_t +ZSTD_reset_matchState(ZSTD_matchState_t* ms, + ZSTD_cwksp* ws, + const ZSTD_compressionParameters* cParams, + const ZSTD_paramSwitch_e useRowMatchFinder, + const ZSTD_compResetPolicy_e crp, + const ZSTD_indexResetPolicy_e forceResetIndex, + const ZSTD_resetTarget_e forWho) +{ + /* disable chain table allocation for fast or row-based strategies */ + size_t const chainSize = ZSTD_allocateChainTable(cParams->strategy, useRowMatchFinder, + ms->dedicatedDictSearch && (forWho == ZSTD_resetTarget_CDict)) + ? ((size_t)1 << cParams->chainLog) + : 0; + size_t const hSize = ((size_t)1) << cParams->hashLog; + U32 const hashLog3 = ((forWho == ZSTD_resetTarget_CCtx) && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0; + size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0; + + DEBUGLOG(4, "reset indices : %u", forceResetIndex == ZSTDirp_reset); + assert(useRowMatchFinder != ZSTD_ps_auto); + if (forceResetIndex == ZSTDirp_reset) { + ZSTD_window_init(&ms->window); + ZSTD_cwksp_mark_tables_dirty(ws); + } + + ms->hashLog3 = hashLog3; + ms->lazySkipping = 0; + + ZSTD_invalidateMatchState(ms); + + assert(!ZSTD_cwksp_reserve_failed(ws)); /* check that allocation hasn't already failed */ + + ZSTD_cwksp_clear_tables(ws); + + DEBUGLOG(5, "reserving table space"); + /* table Space */ + ms->hashTable = (U32*)ZSTD_cwksp_reserve_table(ws, hSize * sizeof(U32)); + ms->chainTable = (U32*)ZSTD_cwksp_reserve_table(ws, chainSize * sizeof(U32)); + ms->hashTable3 = (U32*)ZSTD_cwksp_reserve_table(ws, h3Size * sizeof(U32)); + RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation, + "failed a workspace allocation in ZSTD_reset_matchState"); + + DEBUGLOG(4, "reset table : %u", crp!=ZSTDcrp_leaveDirty); + if (crp!=ZSTDcrp_leaveDirty) { + /* reset tables only */ + ZSTD_cwksp_clean_tables(ws); + } + + if (ZSTD_rowMatchFinderUsed(cParams->strategy, useRowMatchFinder)) { + /* Row match finder needs an additional table of hashes ("tags") */ + size_t const tagTableSize = hSize; + /* We want to generate a new salt in case we reset a Cctx, but we always want to use + * 0 when we reset a Cdict */ + if(forWho == ZSTD_resetTarget_CCtx) { + ms->tagTable = (BYTE*) ZSTD_cwksp_reserve_aligned_init_once(ws, tagTableSize); + ZSTD_advanceHashSalt(ms); + } else { + /* When we are not salting we want to always memset the memory */ + ms->tagTable = (BYTE*) ZSTD_cwksp_reserve_aligned(ws, tagTableSize); + ZSTD_memset(ms->tagTable, 0, tagTableSize); + ms->hashSalt = 0; + } + { /* Switch to 32-entry rows if searchLog is 5 (or more) */ + U32 const rowLog = BOUNDED(4, cParams->searchLog, 6); + assert(cParams->hashLog >= rowLog); + ms->rowHashLog = cParams->hashLog - rowLog; + } + } + + /* opt parser space */ + if ((forWho == ZSTD_resetTarget_CCtx) && (cParams->strategy >= ZSTD_btopt)) { + DEBUGLOG(4, "reserving optimal parser space"); + ms->opt.litFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (1<opt.litLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxLL+1) * sizeof(unsigned)); + ms->opt.matchLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxML+1) * sizeof(unsigned)); + ms->opt.offCodeFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxOff+1) * sizeof(unsigned)); + ms->opt.matchTable = (ZSTD_match_t*)ZSTD_cwksp_reserve_aligned(ws, ZSTD_OPT_SIZE * sizeof(ZSTD_match_t)); + ms->opt.priceTable = (ZSTD_optimal_t*)ZSTD_cwksp_reserve_aligned(ws, ZSTD_OPT_SIZE * sizeof(ZSTD_optimal_t)); + } + + ms->cParams = *cParams; + + RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation, + "failed a workspace allocation in ZSTD_reset_matchState"); + return 0; +} + +/* ZSTD_indexTooCloseToMax() : + * minor optimization : prefer memset() rather than reduceIndex() + * which is measurably slow in some circumstances (reported for Visual Studio). + * Works when re-using a context for a lot of smallish inputs : + * if all inputs are smaller than ZSTD_INDEXOVERFLOW_MARGIN, + * memset() will be triggered before reduceIndex(). + */ +#define ZSTD_INDEXOVERFLOW_MARGIN (16 MB) +static int ZSTD_indexTooCloseToMax(ZSTD_window_t w) +{ + return (size_t)(w.nextSrc - w.base) > (ZSTD_CURRENT_MAX - ZSTD_INDEXOVERFLOW_MARGIN); +} + +/** ZSTD_dictTooBig(): + * When dictionaries are larger than ZSTD_CHUNKSIZE_MAX they can't be loaded in + * one go generically. So we ensure that in that case we reset the tables to zero, + * so that we can load as much of the dictionary as possible. + */ +static int ZSTD_dictTooBig(size_t const loadedDictSize) +{ + return loadedDictSize > ZSTD_CHUNKSIZE_MAX; +} + +/*! ZSTD_resetCCtx_internal() : + * @param loadedDictSize The size of the dictionary to be loaded + * into the context, if any. If no dictionary is used, or the + * dictionary is being attached / copied, then pass 0. + * note : `params` are assumed fully validated at this stage. + */ +static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, + ZSTD_CCtx_params const* params, + U64 const pledgedSrcSize, + size_t const loadedDictSize, + ZSTD_compResetPolicy_e const crp, + ZSTD_buffered_policy_e const zbuff) +{ + ZSTD_cwksp* const ws = &zc->workspace; + DEBUGLOG(4, "ZSTD_resetCCtx_internal: pledgedSrcSize=%u, wlog=%u, useRowMatchFinder=%d useBlockSplitter=%d", + (U32)pledgedSrcSize, params->cParams.windowLog, (int)params->useRowMatchFinder, (int)params->useBlockSplitter); + assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams))); + + zc->isFirstBlock = 1; + + /* Set applied params early so we can modify them for LDM, + * and point params at the applied params. + */ + zc->appliedParams = *params; + params = &zc->appliedParams; + + assert(params->useRowMatchFinder != ZSTD_ps_auto); + assert(params->useBlockSplitter != ZSTD_ps_auto); + assert(params->ldmParams.enableLdm != ZSTD_ps_auto); + assert(params->maxBlockSize != 0); + if (params->ldmParams.enableLdm == ZSTD_ps_enable) { + /* Adjust long distance matching parameters */ + ZSTD_ldm_adjustParameters(&zc->appliedParams.ldmParams, ¶ms->cParams); + assert(params->ldmParams.hashLog >= params->ldmParams.bucketSizeLog); + assert(params->ldmParams.hashRateLog < 32); + } + + { size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params->cParams.windowLog), pledgedSrcSize)); + size_t const blockSize = MIN(params->maxBlockSize, windowSize); + size_t const maxNbSeq = ZSTD_maxNbSeq(blockSize, params->cParams.minMatch, ZSTD_hasExtSeqProd(params)); + size_t const buffOutSize = (zbuff == ZSTDb_buffered && params->outBufferMode == ZSTD_bm_buffered) + ? ZSTD_compressBound(blockSize) + 1 + : 0; + size_t const buffInSize = (zbuff == ZSTDb_buffered && params->inBufferMode == ZSTD_bm_buffered) + ? windowSize + blockSize + : 0; + size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(params->ldmParams, blockSize); + + int const indexTooClose = ZSTD_indexTooCloseToMax(zc->blockState.matchState.window); + int const dictTooBig = ZSTD_dictTooBig(loadedDictSize); + ZSTD_indexResetPolicy_e needsIndexReset = + (indexTooClose || dictTooBig || !zc->initialized) ? ZSTDirp_reset : ZSTDirp_continue; + + size_t const neededSpace = + ZSTD_estimateCCtxSize_usingCCtxParams_internal( + ¶ms->cParams, ¶ms->ldmParams, zc->staticSize != 0, params->useRowMatchFinder, + buffInSize, buffOutSize, pledgedSrcSize, ZSTD_hasExtSeqProd(params), params->maxBlockSize); + + FORWARD_IF_ERROR(neededSpace, "cctx size estimate failed!"); + + if (!zc->staticSize) ZSTD_cwksp_bump_oversized_duration(ws, 0); + + { /* Check if workspace is large enough, alloc a new one if needed */ + int const workspaceTooSmall = ZSTD_cwksp_sizeof(ws) < neededSpace; + int const workspaceWasteful = ZSTD_cwksp_check_wasteful(ws, neededSpace); + int resizeWorkspace = workspaceTooSmall || workspaceWasteful; + DEBUGLOG(4, "Need %zu B workspace", neededSpace); + DEBUGLOG(4, "windowSize: %zu - blockSize: %zu", windowSize, blockSize); + + if (resizeWorkspace) { + DEBUGLOG(4, "Resize workspaceSize from %zuKB to %zuKB", + ZSTD_cwksp_sizeof(ws) >> 10, + neededSpace >> 10); + + RETURN_ERROR_IF(zc->staticSize, memory_allocation, "static cctx : no resize"); + + needsIndexReset = ZSTDirp_reset; + + ZSTD_cwksp_free(ws, zc->customMem); + FORWARD_IF_ERROR(ZSTD_cwksp_create(ws, neededSpace, zc->customMem), ""); + + DEBUGLOG(5, "reserving object space"); + /* Statically sized space. + * entropyWorkspace never moves, + * though prev/next block swap places */ + assert(ZSTD_cwksp_check_available(ws, 2 * sizeof(ZSTD_compressedBlockState_t))); + zc->blockState.prevCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, sizeof(ZSTD_compressedBlockState_t)); + RETURN_ERROR_IF(zc->blockState.prevCBlock == NULL, memory_allocation, "couldn't allocate prevCBlock"); + zc->blockState.nextCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, sizeof(ZSTD_compressedBlockState_t)); + RETURN_ERROR_IF(zc->blockState.nextCBlock == NULL, memory_allocation, "couldn't allocate nextCBlock"); + zc->entropyWorkspace = (U32*) ZSTD_cwksp_reserve_object(ws, ENTROPY_WORKSPACE_SIZE); + RETURN_ERROR_IF(zc->entropyWorkspace == NULL, memory_allocation, "couldn't allocate entropyWorkspace"); + } } + + ZSTD_cwksp_clear(ws); + + /* init params */ + zc->blockState.matchState.cParams = params->cParams; + zc->blockState.matchState.prefetchCDictTables = params->prefetchCDictTables == ZSTD_ps_enable; + zc->pledgedSrcSizePlusOne = pledgedSrcSize+1; + zc->consumedSrcSize = 0; + zc->producedCSize = 0; + if (pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN) + zc->appliedParams.fParams.contentSizeFlag = 0; + DEBUGLOG(4, "pledged content size : %u ; flag : %u", + (unsigned)pledgedSrcSize, zc->appliedParams.fParams.contentSizeFlag); + zc->blockSize = blockSize; + + XXH64_reset(&zc->xxhState, 0); + zc->stage = ZSTDcs_init; + zc->dictID = 0; + zc->dictContentSize = 0; + + ZSTD_reset_compressedBlockState(zc->blockState.prevCBlock); + + FORWARD_IF_ERROR(ZSTD_reset_matchState( + &zc->blockState.matchState, + ws, + ¶ms->cParams, + params->useRowMatchFinder, + crp, + needsIndexReset, + ZSTD_resetTarget_CCtx), ""); + + zc->seqStore.sequencesStart = (seqDef*)ZSTD_cwksp_reserve_aligned(ws, maxNbSeq * sizeof(seqDef)); + + /* ldm hash table */ + if (params->ldmParams.enableLdm == ZSTD_ps_enable) { + /* TODO: avoid memset? */ + size_t const ldmHSize = ((size_t)1) << params->ldmParams.hashLog; + zc->ldmState.hashTable = (ldmEntry_t*)ZSTD_cwksp_reserve_aligned(ws, ldmHSize * sizeof(ldmEntry_t)); + ZSTD_memset(zc->ldmState.hashTable, 0, ldmHSize * sizeof(ldmEntry_t)); + zc->ldmSequences = (rawSeq*)ZSTD_cwksp_reserve_aligned(ws, maxNbLdmSeq * sizeof(rawSeq)); + zc->maxNbLdmSequences = maxNbLdmSeq; + + ZSTD_window_init(&zc->ldmState.window); + zc->ldmState.loadedDictEnd = 0; + } + + /* reserve space for block-level external sequences */ + if (ZSTD_hasExtSeqProd(params)) { + size_t const maxNbExternalSeq = ZSTD_sequenceBound(blockSize); + zc->extSeqBufCapacity = maxNbExternalSeq; + zc->extSeqBuf = + (ZSTD_Sequence*)ZSTD_cwksp_reserve_aligned(ws, maxNbExternalSeq * sizeof(ZSTD_Sequence)); + } + + /* buffers */ + + /* ZSTD_wildcopy() is used to copy into the literals buffer, + * so we have to oversize the buffer by WILDCOPY_OVERLENGTH bytes. + */ + zc->seqStore.litStart = ZSTD_cwksp_reserve_buffer(ws, blockSize + WILDCOPY_OVERLENGTH); + zc->seqStore.maxNbLit = blockSize; + + zc->bufferedPolicy = zbuff; + zc->inBuffSize = buffInSize; + zc->inBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffInSize); + zc->outBuffSize = buffOutSize; + zc->outBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffOutSize); + + /* ldm bucketOffsets table */ + if (params->ldmParams.enableLdm == ZSTD_ps_enable) { + /* TODO: avoid memset? */ + size_t const numBuckets = + ((size_t)1) << (params->ldmParams.hashLog - + params->ldmParams.bucketSizeLog); + zc->ldmState.bucketOffsets = ZSTD_cwksp_reserve_buffer(ws, numBuckets); + ZSTD_memset(zc->ldmState.bucketOffsets, 0, numBuckets); + } + + /* sequences storage */ + ZSTD_referenceExternalSequences(zc, NULL, 0); + zc->seqStore.maxNbSeq = maxNbSeq; + zc->seqStore.llCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE)); + zc->seqStore.mlCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE)); + zc->seqStore.ofCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE)); + + DEBUGLOG(3, "wksp: finished allocating, %zd bytes remain available", ZSTD_cwksp_available_space(ws)); + assert(ZSTD_cwksp_estimated_space_within_bounds(ws, neededSpace)); + + zc->initialized = 1; + + return 0; + } +} + +/* ZSTD_invalidateRepCodes() : + * ensures next compression will not use repcodes from previous block. + * Note : only works with regular variant; + * do not use with extDict variant ! */ +void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx) { + int i; + for (i=0; iblockState.prevCBlock->rep[i] = 0; + assert(!ZSTD_window_hasExtDict(cctx->blockState.matchState.window)); +} + +/* These are the approximate sizes for each strategy past which copying the + * dictionary tables into the working context is faster than using them + * in-place. + */ +static const size_t attachDictSizeCutoffs[ZSTD_STRATEGY_MAX+1] = { + 8 KB, /* unused */ + 8 KB, /* ZSTD_fast */ + 16 KB, /* ZSTD_dfast */ + 32 KB, /* ZSTD_greedy */ + 32 KB, /* ZSTD_lazy */ + 32 KB, /* ZSTD_lazy2 */ + 32 KB, /* ZSTD_btlazy2 */ + 32 KB, /* ZSTD_btopt */ + 8 KB, /* ZSTD_btultra */ + 8 KB /* ZSTD_btultra2 */ +}; + +static int ZSTD_shouldAttachDict(const ZSTD_CDict* cdict, + const ZSTD_CCtx_params* params, + U64 pledgedSrcSize) +{ + size_t cutoff = attachDictSizeCutoffs[cdict->matchState.cParams.strategy]; + int const dedicatedDictSearch = cdict->matchState.dedicatedDictSearch; + return dedicatedDictSearch + || ( ( pledgedSrcSize <= cutoff + || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN + || params->attachDictPref == ZSTD_dictForceAttach ) + && params->attachDictPref != ZSTD_dictForceCopy + && !params->forceWindow ); /* dictMatchState isn't correctly + * handled in _enforceMaxDist */ +} + +static size_t +ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx, + const ZSTD_CDict* cdict, + ZSTD_CCtx_params params, + U64 pledgedSrcSize, + ZSTD_buffered_policy_e zbuff) +{ + DEBUGLOG(4, "ZSTD_resetCCtx_byAttachingCDict() pledgedSrcSize=%llu", + (unsigned long long)pledgedSrcSize); + { + ZSTD_compressionParameters adjusted_cdict_cParams = cdict->matchState.cParams; + unsigned const windowLog = params.cParams.windowLog; + assert(windowLog != 0); + /* Resize working context table params for input only, since the dict + * has its own tables. */ + /* pledgedSrcSize == 0 means 0! */ + + if (cdict->matchState.dedicatedDictSearch) { + ZSTD_dedicatedDictSearch_revertCParams(&adjusted_cdict_cParams); + } + + params.cParams = ZSTD_adjustCParams_internal(adjusted_cdict_cParams, pledgedSrcSize, + cdict->dictContentSize, ZSTD_cpm_attachDict, + params.useRowMatchFinder); + params.cParams.windowLog = windowLog; + params.useRowMatchFinder = cdict->useRowMatchFinder; /* cdict overrides */ + FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, ¶ms, pledgedSrcSize, + /* loadedDictSize */ 0, + ZSTDcrp_makeClean, zbuff), ""); + assert(cctx->appliedParams.cParams.strategy == adjusted_cdict_cParams.strategy); + } + + { const U32 cdictEnd = (U32)( cdict->matchState.window.nextSrc + - cdict->matchState.window.base); + const U32 cdictLen = cdictEnd - cdict->matchState.window.dictLimit; + if (cdictLen == 0) { + /* don't even attach dictionaries with no contents */ + DEBUGLOG(4, "skipping attaching empty dictionary"); + } else { + DEBUGLOG(4, "attaching dictionary into context"); + cctx->blockState.matchState.dictMatchState = &cdict->matchState; + + /* prep working match state so dict matches never have negative indices + * when they are translated to the working context's index space. */ + if (cctx->blockState.matchState.window.dictLimit < cdictEnd) { + cctx->blockState.matchState.window.nextSrc = + cctx->blockState.matchState.window.base + cdictEnd; + ZSTD_window_clear(&cctx->blockState.matchState.window); + } + /* loadedDictEnd is expressed within the referential of the active context */ + cctx->blockState.matchState.loadedDictEnd = cctx->blockState.matchState.window.dictLimit; + } } + + cctx->dictID = cdict->dictID; + cctx->dictContentSize = cdict->dictContentSize; + + /* copy block state */ + ZSTD_memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState)); + + return 0; +} + +static void ZSTD_copyCDictTableIntoCCtx(U32* dst, U32 const* src, size_t tableSize, + ZSTD_compressionParameters const* cParams) { + if (ZSTD_CDictIndicesAreTagged(cParams)){ + /* Remove tags from the CDict table if they are present. + * See docs on "short cache" in zstd_compress_internal.h for context. */ + size_t i; + for (i = 0; i < tableSize; i++) { + U32 const taggedIndex = src[i]; + U32 const index = taggedIndex >> ZSTD_SHORT_CACHE_TAG_BITS; + dst[i] = index; + } + } else { + ZSTD_memcpy(dst, src, tableSize * sizeof(U32)); + } +} + +static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx, + const ZSTD_CDict* cdict, + ZSTD_CCtx_params params, + U64 pledgedSrcSize, + ZSTD_buffered_policy_e zbuff) +{ + const ZSTD_compressionParameters *cdict_cParams = &cdict->matchState.cParams; + + assert(!cdict->matchState.dedicatedDictSearch); + DEBUGLOG(4, "ZSTD_resetCCtx_byCopyingCDict() pledgedSrcSize=%llu", + (unsigned long long)pledgedSrcSize); + + { unsigned const windowLog = params.cParams.windowLog; + assert(windowLog != 0); + /* Copy only compression parameters related to tables. */ + params.cParams = *cdict_cParams; + params.cParams.windowLog = windowLog; + params.useRowMatchFinder = cdict->useRowMatchFinder; + FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, ¶ms, pledgedSrcSize, + /* loadedDictSize */ 0, + ZSTDcrp_leaveDirty, zbuff), ""); + assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy); + assert(cctx->appliedParams.cParams.hashLog == cdict_cParams->hashLog); + assert(cctx->appliedParams.cParams.chainLog == cdict_cParams->chainLog); + } + + ZSTD_cwksp_mark_tables_dirty(&cctx->workspace); + assert(params.useRowMatchFinder != ZSTD_ps_auto); + + /* copy tables */ + { size_t const chainSize = ZSTD_allocateChainTable(cdict_cParams->strategy, cdict->useRowMatchFinder, 0 /* DDS guaranteed disabled */) + ? ((size_t)1 << cdict_cParams->chainLog) + : 0; + size_t const hSize = (size_t)1 << cdict_cParams->hashLog; + + ZSTD_copyCDictTableIntoCCtx(cctx->blockState.matchState.hashTable, + cdict->matchState.hashTable, + hSize, cdict_cParams); + + /* Do not copy cdict's chainTable if cctx has parameters such that it would not use chainTable */ + if (ZSTD_allocateChainTable(cctx->appliedParams.cParams.strategy, cctx->appliedParams.useRowMatchFinder, 0 /* forDDSDict */)) { + ZSTD_copyCDictTableIntoCCtx(cctx->blockState.matchState.chainTable, + cdict->matchState.chainTable, + chainSize, cdict_cParams); + } + /* copy tag table */ + if (ZSTD_rowMatchFinderUsed(cdict_cParams->strategy, cdict->useRowMatchFinder)) { + size_t const tagTableSize = hSize; + ZSTD_memcpy(cctx->blockState.matchState.tagTable, + cdict->matchState.tagTable, + tagTableSize); + cctx->blockState.matchState.hashSalt = cdict->matchState.hashSalt; + } + } + + /* Zero the hashTable3, since the cdict never fills it */ + { int const h3log = cctx->blockState.matchState.hashLog3; + size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0; + assert(cdict->matchState.hashLog3 == 0); + ZSTD_memset(cctx->blockState.matchState.hashTable3, 0, h3Size * sizeof(U32)); + } + + ZSTD_cwksp_mark_tables_clean(&cctx->workspace); + + /* copy dictionary offsets */ + { ZSTD_matchState_t const* srcMatchState = &cdict->matchState; + ZSTD_matchState_t* dstMatchState = &cctx->blockState.matchState; + dstMatchState->window = srcMatchState->window; + dstMatchState->nextToUpdate = srcMatchState->nextToUpdate; + dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd; + } + + cctx->dictID = cdict->dictID; + cctx->dictContentSize = cdict->dictContentSize; + + /* copy block state */ + ZSTD_memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState)); + + return 0; +} + +/* We have a choice between copying the dictionary context into the working + * context, or referencing the dictionary context from the working context + * in-place. We decide here which strategy to use. */ +static size_t ZSTD_resetCCtx_usingCDict(ZSTD_CCtx* cctx, + const ZSTD_CDict* cdict, + const ZSTD_CCtx_params* params, + U64 pledgedSrcSize, + ZSTD_buffered_policy_e zbuff) +{ + + DEBUGLOG(4, "ZSTD_resetCCtx_usingCDict (pledgedSrcSize=%u)", + (unsigned)pledgedSrcSize); + + if (ZSTD_shouldAttachDict(cdict, params, pledgedSrcSize)) { + return ZSTD_resetCCtx_byAttachingCDict( + cctx, cdict, *params, pledgedSrcSize, zbuff); + } else { + return ZSTD_resetCCtx_byCopyingCDict( + cctx, cdict, *params, pledgedSrcSize, zbuff); + } +} + +/*! ZSTD_copyCCtx_internal() : + * Duplicate an existing context `srcCCtx` into another one `dstCCtx`. + * Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()). + * The "context", in this case, refers to the hash and chain tables, + * entropy tables, and dictionary references. + * `windowLog` value is enforced if != 0, otherwise value is copied from srcCCtx. + * @return : 0, or an error code */ +static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx, + const ZSTD_CCtx* srcCCtx, + ZSTD_frameParameters fParams, + U64 pledgedSrcSize, + ZSTD_buffered_policy_e zbuff) +{ + RETURN_ERROR_IF(srcCCtx->stage!=ZSTDcs_init, stage_wrong, + "Can't copy a ctx that's not in init stage."); + DEBUGLOG(5, "ZSTD_copyCCtx_internal"); + ZSTD_memcpy(&dstCCtx->customMem, &srcCCtx->customMem, sizeof(ZSTD_customMem)); + { ZSTD_CCtx_params params = dstCCtx->requestedParams; + /* Copy only compression parameters related to tables. */ + params.cParams = srcCCtx->appliedParams.cParams; + assert(srcCCtx->appliedParams.useRowMatchFinder != ZSTD_ps_auto); + assert(srcCCtx->appliedParams.useBlockSplitter != ZSTD_ps_auto); + assert(srcCCtx->appliedParams.ldmParams.enableLdm != ZSTD_ps_auto); + params.useRowMatchFinder = srcCCtx->appliedParams.useRowMatchFinder; + params.useBlockSplitter = srcCCtx->appliedParams.useBlockSplitter; + params.ldmParams = srcCCtx->appliedParams.ldmParams; + params.fParams = fParams; + params.maxBlockSize = srcCCtx->appliedParams.maxBlockSize; + ZSTD_resetCCtx_internal(dstCCtx, ¶ms, pledgedSrcSize, + /* loadedDictSize */ 0, + ZSTDcrp_leaveDirty, zbuff); + assert(dstCCtx->appliedParams.cParams.windowLog == srcCCtx->appliedParams.cParams.windowLog); + assert(dstCCtx->appliedParams.cParams.strategy == srcCCtx->appliedParams.cParams.strategy); + assert(dstCCtx->appliedParams.cParams.hashLog == srcCCtx->appliedParams.cParams.hashLog); + assert(dstCCtx->appliedParams.cParams.chainLog == srcCCtx->appliedParams.cParams.chainLog); + assert(dstCCtx->blockState.matchState.hashLog3 == srcCCtx->blockState.matchState.hashLog3); + } + + ZSTD_cwksp_mark_tables_dirty(&dstCCtx->workspace); + + /* copy tables */ + { size_t const chainSize = ZSTD_allocateChainTable(srcCCtx->appliedParams.cParams.strategy, + srcCCtx->appliedParams.useRowMatchFinder, + 0 /* forDDSDict */) + ? ((size_t)1 << srcCCtx->appliedParams.cParams.chainLog) + : 0; + size_t const hSize = (size_t)1 << srcCCtx->appliedParams.cParams.hashLog; + int const h3log = srcCCtx->blockState.matchState.hashLog3; + size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0; + + ZSTD_memcpy(dstCCtx->blockState.matchState.hashTable, + srcCCtx->blockState.matchState.hashTable, + hSize * sizeof(U32)); + ZSTD_memcpy(dstCCtx->blockState.matchState.chainTable, + srcCCtx->blockState.matchState.chainTable, + chainSize * sizeof(U32)); + ZSTD_memcpy(dstCCtx->blockState.matchState.hashTable3, + srcCCtx->blockState.matchState.hashTable3, + h3Size * sizeof(U32)); + } + + ZSTD_cwksp_mark_tables_clean(&dstCCtx->workspace); + + /* copy dictionary offsets */ + { + const ZSTD_matchState_t* srcMatchState = &srcCCtx->blockState.matchState; + ZSTD_matchState_t* dstMatchState = &dstCCtx->blockState.matchState; + dstMatchState->window = srcMatchState->window; + dstMatchState->nextToUpdate = srcMatchState->nextToUpdate; + dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd; + } + dstCCtx->dictID = srcCCtx->dictID; + dstCCtx->dictContentSize = srcCCtx->dictContentSize; + + /* copy block state */ + ZSTD_memcpy(dstCCtx->blockState.prevCBlock, srcCCtx->blockState.prevCBlock, sizeof(*srcCCtx->blockState.prevCBlock)); + + return 0; +} + +/*! ZSTD_copyCCtx() : + * Duplicate an existing context `srcCCtx` into another one `dstCCtx`. + * Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()). + * pledgedSrcSize==0 means "unknown". +* @return : 0, or an error code */ +size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx, unsigned long long pledgedSrcSize) +{ + ZSTD_frameParameters fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ }; + ZSTD_buffered_policy_e const zbuff = srcCCtx->bufferedPolicy; + ZSTD_STATIC_ASSERT((U32)ZSTDb_buffered==1); + if (pledgedSrcSize==0) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN; + fParams.contentSizeFlag = (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN); + + return ZSTD_copyCCtx_internal(dstCCtx, srcCCtx, + fParams, pledgedSrcSize, + zbuff); +} + + +#define ZSTD_ROWSIZE 16 +/*! ZSTD_reduceTable() : + * reduce table indexes by `reducerValue`, or squash to zero. + * PreserveMark preserves "unsorted mark" for btlazy2 strategy. + * It must be set to a clear 0/1 value, to remove branch during inlining. + * Presume table size is a multiple of ZSTD_ROWSIZE + * to help auto-vectorization */ +FORCE_INLINE_TEMPLATE void +ZSTD_reduceTable_internal (U32* const table, U32 const size, U32 const reducerValue, int const preserveMark) +{ + int const nbRows = (int)size / ZSTD_ROWSIZE; + int cellNb = 0; + int rowNb; + /* Protect special index values < ZSTD_WINDOW_START_INDEX. */ + U32 const reducerThreshold = reducerValue + ZSTD_WINDOW_START_INDEX; + assert((size & (ZSTD_ROWSIZE-1)) == 0); /* multiple of ZSTD_ROWSIZE */ + assert(size < (1U<<31)); /* can be casted to int */ + +#if ZSTD_MEMORY_SANITIZER && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE) + /* To validate that the table reuse logic is sound, and that we don't + * access table space that we haven't cleaned, we re-"poison" the table + * space every time we mark it dirty. + * + * This function however is intended to operate on those dirty tables and + * re-clean them. So when this function is used correctly, we can unpoison + * the memory it operated on. This introduces a blind spot though, since + * if we now try to operate on __actually__ poisoned memory, we will not + * detect that. */ + __msan_unpoison(table, size * sizeof(U32)); +#endif + + for (rowNb=0 ; rowNb < nbRows ; rowNb++) { + int column; + for (column=0; columncParams.hashLog; + ZSTD_reduceTable(ms->hashTable, hSize, reducerValue); + } + + if (ZSTD_allocateChainTable(params->cParams.strategy, params->useRowMatchFinder, (U32)ms->dedicatedDictSearch)) { + U32 const chainSize = (U32)1 << params->cParams.chainLog; + if (params->cParams.strategy == ZSTD_btlazy2) + ZSTD_reduceTable_btlazy2(ms->chainTable, chainSize, reducerValue); + else + ZSTD_reduceTable(ms->chainTable, chainSize, reducerValue); + } + + if (ms->hashLog3) { + U32 const h3Size = (U32)1 << ms->hashLog3; + ZSTD_reduceTable(ms->hashTable3, h3Size, reducerValue); + } +} + + +/*-******************************************************* +* Block entropic compression +*********************************************************/ + +/* See doc/zstd_compression_format.md for detailed format description */ + +int ZSTD_seqToCodes(const seqStore_t* seqStorePtr) +{ + const seqDef* const sequences = seqStorePtr->sequencesStart; + BYTE* const llCodeTable = seqStorePtr->llCode; + BYTE* const ofCodeTable = seqStorePtr->ofCode; + BYTE* const mlCodeTable = seqStorePtr->mlCode; + U32 const nbSeq = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); + U32 u; + int longOffsets = 0; + assert(nbSeq <= seqStorePtr->maxNbSeq); + for (u=0; u= STREAM_ACCUMULATOR_MIN)); + if (MEM_32bits() && ofCode >= STREAM_ACCUMULATOR_MIN) + longOffsets = 1; + } + if (seqStorePtr->longLengthType==ZSTD_llt_literalLength) + llCodeTable[seqStorePtr->longLengthPos] = MaxLL; + if (seqStorePtr->longLengthType==ZSTD_llt_matchLength) + mlCodeTable[seqStorePtr->longLengthPos] = MaxML; + return longOffsets; +} + +/* ZSTD_useTargetCBlockSize(): + * Returns if target compressed block size param is being used. + * If used, compression will do best effort to make a compressed block size to be around targetCBlockSize. + * Returns 1 if true, 0 otherwise. */ +static int ZSTD_useTargetCBlockSize(const ZSTD_CCtx_params* cctxParams) +{ + DEBUGLOG(5, "ZSTD_useTargetCBlockSize (targetCBlockSize=%zu)", cctxParams->targetCBlockSize); + return (cctxParams->targetCBlockSize != 0); +} + +/* ZSTD_blockSplitterEnabled(): + * Returns if block splitting param is being used + * If used, compression will do best effort to split a block in order to improve compression ratio. + * At the time this function is called, the parameter must be finalized. + * Returns 1 if true, 0 otherwise. */ +static int ZSTD_blockSplitterEnabled(ZSTD_CCtx_params* cctxParams) +{ + DEBUGLOG(5, "ZSTD_blockSplitterEnabled (useBlockSplitter=%d)", cctxParams->useBlockSplitter); + assert(cctxParams->useBlockSplitter != ZSTD_ps_auto); + return (cctxParams->useBlockSplitter == ZSTD_ps_enable); +} + +/* Type returned by ZSTD_buildSequencesStatistics containing finalized symbol encoding types + * and size of the sequences statistics + */ +typedef struct { + U32 LLtype; + U32 Offtype; + U32 MLtype; + size_t size; + size_t lastCountSize; /* Accounts for bug in 1.3.4. More detail in ZSTD_entropyCompressSeqStore_internal() */ + int longOffsets; +} ZSTD_symbolEncodingTypeStats_t; + +/* ZSTD_buildSequencesStatistics(): + * Returns a ZSTD_symbolEncodingTypeStats_t, or a zstd error code in the `size` field. + * Modifies `nextEntropy` to have the appropriate values as a side effect. + * nbSeq must be greater than 0. + * + * entropyWkspSize must be of size at least ENTROPY_WORKSPACE_SIZE - (MaxSeq + 1)*sizeof(U32) + */ +static ZSTD_symbolEncodingTypeStats_t +ZSTD_buildSequencesStatistics( + const seqStore_t* seqStorePtr, size_t nbSeq, + const ZSTD_fseCTables_t* prevEntropy, ZSTD_fseCTables_t* nextEntropy, + BYTE* dst, const BYTE* const dstEnd, + ZSTD_strategy strategy, unsigned* countWorkspace, + void* entropyWorkspace, size_t entropyWkspSize) +{ + BYTE* const ostart = dst; + const BYTE* const oend = dstEnd; + BYTE* op = ostart; + FSE_CTable* CTable_LitLength = nextEntropy->litlengthCTable; + FSE_CTable* CTable_OffsetBits = nextEntropy->offcodeCTable; + FSE_CTable* CTable_MatchLength = nextEntropy->matchlengthCTable; + const BYTE* const ofCodeTable = seqStorePtr->ofCode; + const BYTE* const llCodeTable = seqStorePtr->llCode; + const BYTE* const mlCodeTable = seqStorePtr->mlCode; + ZSTD_symbolEncodingTypeStats_t stats; + + stats.lastCountSize = 0; + /* convert length/distances into codes */ + stats.longOffsets = ZSTD_seqToCodes(seqStorePtr); + assert(op <= oend); + assert(nbSeq != 0); /* ZSTD_selectEncodingType() divides by nbSeq */ + /* build CTable for Literal Lengths */ + { unsigned max = MaxLL; + size_t const mostFrequent = HIST_countFast_wksp(countWorkspace, &max, llCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */ + DEBUGLOG(5, "Building LL table"); + nextEntropy->litlength_repeatMode = prevEntropy->litlength_repeatMode; + stats.LLtype = ZSTD_selectEncodingType(&nextEntropy->litlength_repeatMode, + countWorkspace, max, mostFrequent, nbSeq, + LLFSELog, prevEntropy->litlengthCTable, + LL_defaultNorm, LL_defaultNormLog, + ZSTD_defaultAllowed, strategy); + assert(set_basic < set_compressed && set_rle < set_compressed); + assert(!(stats.LLtype < set_compressed && nextEntropy->litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ + { size_t const countSize = ZSTD_buildCTable( + op, (size_t)(oend - op), + CTable_LitLength, LLFSELog, (symbolEncodingType_e)stats.LLtype, + countWorkspace, max, llCodeTable, nbSeq, + LL_defaultNorm, LL_defaultNormLog, MaxLL, + prevEntropy->litlengthCTable, + sizeof(prevEntropy->litlengthCTable), + entropyWorkspace, entropyWkspSize); + if (ZSTD_isError(countSize)) { + DEBUGLOG(3, "ZSTD_buildCTable for LitLens failed"); + stats.size = countSize; + return stats; + } + if (stats.LLtype == set_compressed) + stats.lastCountSize = countSize; + op += countSize; + assert(op <= oend); + } } + /* build CTable for Offsets */ + { unsigned max = MaxOff; + size_t const mostFrequent = HIST_countFast_wksp( + countWorkspace, &max, ofCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */ + /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */ + ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed; + DEBUGLOG(5, "Building OF table"); + nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode; + stats.Offtype = ZSTD_selectEncodingType(&nextEntropy->offcode_repeatMode, + countWorkspace, max, mostFrequent, nbSeq, + OffFSELog, prevEntropy->offcodeCTable, + OF_defaultNorm, OF_defaultNormLog, + defaultPolicy, strategy); + assert(!(stats.Offtype < set_compressed && nextEntropy->offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */ + { size_t const countSize = ZSTD_buildCTable( + op, (size_t)(oend - op), + CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)stats.Offtype, + countWorkspace, max, ofCodeTable, nbSeq, + OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, + prevEntropy->offcodeCTable, + sizeof(prevEntropy->offcodeCTable), + entropyWorkspace, entropyWkspSize); + if (ZSTD_isError(countSize)) { + DEBUGLOG(3, "ZSTD_buildCTable for Offsets failed"); + stats.size = countSize; + return stats; + } + if (stats.Offtype == set_compressed) + stats.lastCountSize = countSize; + op += countSize; + assert(op <= oend); + } } + /* build CTable for MatchLengths */ + { unsigned max = MaxML; + size_t const mostFrequent = HIST_countFast_wksp( + countWorkspace, &max, mlCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */ + DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op)); + nextEntropy->matchlength_repeatMode = prevEntropy->matchlength_repeatMode; + stats.MLtype = ZSTD_selectEncodingType(&nextEntropy->matchlength_repeatMode, + countWorkspace, max, mostFrequent, nbSeq, + MLFSELog, prevEntropy->matchlengthCTable, + ML_defaultNorm, ML_defaultNormLog, + ZSTD_defaultAllowed, strategy); + assert(!(stats.MLtype < set_compressed && nextEntropy->matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ + { size_t const countSize = ZSTD_buildCTable( + op, (size_t)(oend - op), + CTable_MatchLength, MLFSELog, (symbolEncodingType_e)stats.MLtype, + countWorkspace, max, mlCodeTable, nbSeq, + ML_defaultNorm, ML_defaultNormLog, MaxML, + prevEntropy->matchlengthCTable, + sizeof(prevEntropy->matchlengthCTable), + entropyWorkspace, entropyWkspSize); + if (ZSTD_isError(countSize)) { + DEBUGLOG(3, "ZSTD_buildCTable for MatchLengths failed"); + stats.size = countSize; + return stats; + } + if (stats.MLtype == set_compressed) + stats.lastCountSize = countSize; + op += countSize; + assert(op <= oend); + } } + stats.size = (size_t)(op-ostart); + return stats; +} + +/* ZSTD_entropyCompressSeqStore_internal(): + * compresses both literals and sequences + * Returns compressed size of block, or a zstd error. + */ +#define SUSPECT_UNCOMPRESSIBLE_LITERAL_RATIO 20 +MEM_STATIC size_t +ZSTD_entropyCompressSeqStore_internal( + const seqStore_t* seqStorePtr, + const ZSTD_entropyCTables_t* prevEntropy, + ZSTD_entropyCTables_t* nextEntropy, + const ZSTD_CCtx_params* cctxParams, + void* dst, size_t dstCapacity, + void* entropyWorkspace, size_t entropyWkspSize, + const int bmi2) +{ + ZSTD_strategy const strategy = cctxParams->cParams.strategy; + unsigned* count = (unsigned*)entropyWorkspace; + FSE_CTable* CTable_LitLength = nextEntropy->fse.litlengthCTable; + FSE_CTable* CTable_OffsetBits = nextEntropy->fse.offcodeCTable; + FSE_CTable* CTable_MatchLength = nextEntropy->fse.matchlengthCTable; + const seqDef* const sequences = seqStorePtr->sequencesStart; + const size_t nbSeq = (size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart); + const BYTE* const ofCodeTable = seqStorePtr->ofCode; + const BYTE* const llCodeTable = seqStorePtr->llCode; + const BYTE* const mlCodeTable = seqStorePtr->mlCode; + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + dstCapacity; + BYTE* op = ostart; + size_t lastCountSize; + int longOffsets = 0; + + entropyWorkspace = count + (MaxSeq + 1); + entropyWkspSize -= (MaxSeq + 1) * sizeof(*count); + + DEBUGLOG(5, "ZSTD_entropyCompressSeqStore_internal (nbSeq=%zu, dstCapacity=%zu)", nbSeq, dstCapacity); + ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<= HUF_WORKSPACE_SIZE); + + /* Compress literals */ + { const BYTE* const literals = seqStorePtr->litStart; + size_t const numSequences = (size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart); + size_t const numLiterals = (size_t)(seqStorePtr->lit - seqStorePtr->litStart); + /* Base suspicion of uncompressibility on ratio of literals to sequences */ + unsigned const suspectUncompressible = (numSequences == 0) || (numLiterals / numSequences >= SUSPECT_UNCOMPRESSIBLE_LITERAL_RATIO); + size_t const litSize = (size_t)(seqStorePtr->lit - literals); + + size_t const cSize = ZSTD_compressLiterals( + op, dstCapacity, + literals, litSize, + entropyWorkspace, entropyWkspSize, + &prevEntropy->huf, &nextEntropy->huf, + cctxParams->cParams.strategy, + ZSTD_literalsCompressionIsDisabled(cctxParams), + suspectUncompressible, bmi2); + FORWARD_IF_ERROR(cSize, "ZSTD_compressLiterals failed"); + assert(cSize <= dstCapacity); + op += cSize; + } + + /* Sequences Header */ + RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/, + dstSize_tooSmall, "Can't fit seq hdr in output buf!"); + if (nbSeq < 128) { + *op++ = (BYTE)nbSeq; + } else if (nbSeq < LONGNBSEQ) { + op[0] = (BYTE)((nbSeq>>8) + 0x80); + op[1] = (BYTE)nbSeq; + op+=2; + } else { + op[0]=0xFF; + MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)); + op+=3; + } + assert(op <= oend); + if (nbSeq==0) { + /* Copy the old tables over as if we repeated them */ + ZSTD_memcpy(&nextEntropy->fse, &prevEntropy->fse, sizeof(prevEntropy->fse)); + return (size_t)(op - ostart); + } + { BYTE* const seqHead = op++; + /* build stats for sequences */ + const ZSTD_symbolEncodingTypeStats_t stats = + ZSTD_buildSequencesStatistics(seqStorePtr, nbSeq, + &prevEntropy->fse, &nextEntropy->fse, + op, oend, + strategy, count, + entropyWorkspace, entropyWkspSize); + FORWARD_IF_ERROR(stats.size, "ZSTD_buildSequencesStatistics failed!"); + *seqHead = (BYTE)((stats.LLtype<<6) + (stats.Offtype<<4) + (stats.MLtype<<2)); + lastCountSize = stats.lastCountSize; + op += stats.size; + longOffsets = stats.longOffsets; + } + + { size_t const bitstreamSize = ZSTD_encodeSequences( + op, (size_t)(oend - op), + CTable_MatchLength, mlCodeTable, + CTable_OffsetBits, ofCodeTable, + CTable_LitLength, llCodeTable, + sequences, nbSeq, + longOffsets, bmi2); + FORWARD_IF_ERROR(bitstreamSize, "ZSTD_encodeSequences failed"); + op += bitstreamSize; + assert(op <= oend); + /* zstd versions <= 1.3.4 mistakenly report corruption when + * FSE_readNCount() receives a buffer < 4 bytes. + * Fixed by https://github.com/facebook/zstd/pull/1146. + * This can happen when the last set_compressed table present is 2 + * bytes and the bitstream is only one byte. + * In this exceedingly rare case, we will simply emit an uncompressed + * block, since it isn't worth optimizing. + */ + if (lastCountSize && (lastCountSize + bitstreamSize) < 4) { + /* lastCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */ + assert(lastCountSize + bitstreamSize == 3); + DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.3.4 by " + "emitting an uncompressed block."); + return 0; + } + } + + DEBUGLOG(5, "compressed block size : %u", (unsigned)(op - ostart)); + return (size_t)(op - ostart); +} + +MEM_STATIC size_t +ZSTD_entropyCompressSeqStore( + const seqStore_t* seqStorePtr, + const ZSTD_entropyCTables_t* prevEntropy, + ZSTD_entropyCTables_t* nextEntropy, + const ZSTD_CCtx_params* cctxParams, + void* dst, size_t dstCapacity, + size_t srcSize, + void* entropyWorkspace, size_t entropyWkspSize, + int bmi2) +{ + size_t const cSize = ZSTD_entropyCompressSeqStore_internal( + seqStorePtr, prevEntropy, nextEntropy, cctxParams, + dst, dstCapacity, + entropyWorkspace, entropyWkspSize, bmi2); + if (cSize == 0) return 0; + /* When srcSize <= dstCapacity, there is enough space to write a raw uncompressed block. + * Since we ran out of space, block must be not compressible, so fall back to raw uncompressed block. + */ + if ((cSize == ERROR(dstSize_tooSmall)) & (srcSize <= dstCapacity)) { + DEBUGLOG(4, "not enough dstCapacity (%zu) for ZSTD_entropyCompressSeqStore_internal()=> do not compress block", dstCapacity); + return 0; /* block not compressed */ + } + FORWARD_IF_ERROR(cSize, "ZSTD_entropyCompressSeqStore_internal failed"); + + /* Check compressibility */ + { size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, cctxParams->cParams.strategy); + if (cSize >= maxCSize) return 0; /* block not compressed */ + } + DEBUGLOG(5, "ZSTD_entropyCompressSeqStore() cSize: %zu", cSize); + /* libzstd decoder before > v1.5.4 is not compatible with compressed blocks of size ZSTD_BLOCKSIZE_MAX exactly. + * This restriction is indirectly already fulfilled by respecting ZSTD_minGain() condition above. + */ + assert(cSize < ZSTD_BLOCKSIZE_MAX); + return cSize; +} + +/* ZSTD_selectBlockCompressor() : + * Not static, but internal use only (used by long distance matcher) + * assumption : strat is a valid strategy */ +ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_paramSwitch_e useRowMatchFinder, ZSTD_dictMode_e dictMode) +{ + static const ZSTD_blockCompressor blockCompressor[4][ZSTD_STRATEGY_MAX+1] = { + { ZSTD_compressBlock_fast /* default for 0 */, + ZSTD_compressBlock_fast, + ZSTD_COMPRESSBLOCK_DOUBLEFAST, + ZSTD_COMPRESSBLOCK_GREEDY, + ZSTD_COMPRESSBLOCK_LAZY, + ZSTD_COMPRESSBLOCK_LAZY2, + ZSTD_COMPRESSBLOCK_BTLAZY2, + ZSTD_COMPRESSBLOCK_BTOPT, + ZSTD_COMPRESSBLOCK_BTULTRA, + ZSTD_COMPRESSBLOCK_BTULTRA2 + }, + { ZSTD_compressBlock_fast_extDict /* default for 0 */, + ZSTD_compressBlock_fast_extDict, + ZSTD_COMPRESSBLOCK_DOUBLEFAST_EXTDICT, + ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT, + ZSTD_COMPRESSBLOCK_LAZY_EXTDICT, + ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT, + ZSTD_COMPRESSBLOCK_BTLAZY2_EXTDICT, + ZSTD_COMPRESSBLOCK_BTOPT_EXTDICT, + ZSTD_COMPRESSBLOCK_BTULTRA_EXTDICT, + ZSTD_COMPRESSBLOCK_BTULTRA_EXTDICT + }, + { ZSTD_compressBlock_fast_dictMatchState /* default for 0 */, + ZSTD_compressBlock_fast_dictMatchState, + ZSTD_COMPRESSBLOCK_DOUBLEFAST_DICTMATCHSTATE, + ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE, + ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE, + ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE, + ZSTD_COMPRESSBLOCK_BTLAZY2_DICTMATCHSTATE, + ZSTD_COMPRESSBLOCK_BTOPT_DICTMATCHSTATE, + ZSTD_COMPRESSBLOCK_BTULTRA_DICTMATCHSTATE, + ZSTD_COMPRESSBLOCK_BTULTRA_DICTMATCHSTATE + }, + { NULL /* default for 0 */, + NULL, + NULL, + ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH, + ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH, + ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH, + NULL, + NULL, + NULL, + NULL } + }; + ZSTD_blockCompressor selectedCompressor; + ZSTD_STATIC_ASSERT((unsigned)ZSTD_fast == 1); + + assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat)); + DEBUGLOG(4, "Selected block compressor: dictMode=%d strat=%d rowMatchfinder=%d", (int)dictMode, (int)strat, (int)useRowMatchFinder); + if (ZSTD_rowMatchFinderUsed(strat, useRowMatchFinder)) { + static const ZSTD_blockCompressor rowBasedBlockCompressors[4][3] = { + { + ZSTD_COMPRESSBLOCK_GREEDY_ROW, + ZSTD_COMPRESSBLOCK_LAZY_ROW, + ZSTD_COMPRESSBLOCK_LAZY2_ROW + }, + { + ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT_ROW, + ZSTD_COMPRESSBLOCK_LAZY_EXTDICT_ROW, + ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT_ROW + }, + { + ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE_ROW, + ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE_ROW, + ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE_ROW + }, + { + ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH_ROW, + ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH_ROW, + ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH_ROW + } + }; + DEBUGLOG(4, "Selecting a row-based matchfinder"); + assert(useRowMatchFinder != ZSTD_ps_auto); + selectedCompressor = rowBasedBlockCompressors[(int)dictMode][(int)strat - (int)ZSTD_greedy]; + } else { + selectedCompressor = blockCompressor[(int)dictMode][(int)strat]; + } + assert(selectedCompressor != NULL); + return selectedCompressor; +} + +static void ZSTD_storeLastLiterals(seqStore_t* seqStorePtr, + const BYTE* anchor, size_t lastLLSize) +{ + ZSTD_memcpy(seqStorePtr->lit, anchor, lastLLSize); + seqStorePtr->lit += lastLLSize; +} + +void ZSTD_resetSeqStore(seqStore_t* ssPtr) +{ + ssPtr->lit = ssPtr->litStart; + ssPtr->sequences = ssPtr->sequencesStart; + ssPtr->longLengthType = ZSTD_llt_none; +} + +/* ZSTD_postProcessSequenceProducerResult() : + * Validates and post-processes sequences obtained through the external matchfinder API: + * - Checks whether nbExternalSeqs represents an error condition. + * - Appends a block delimiter to outSeqs if one is not already present. + * See zstd.h for context regarding block delimiters. + * Returns the number of sequences after post-processing, or an error code. */ +static size_t ZSTD_postProcessSequenceProducerResult( + ZSTD_Sequence* outSeqs, size_t nbExternalSeqs, size_t outSeqsCapacity, size_t srcSize +) { + RETURN_ERROR_IF( + nbExternalSeqs > outSeqsCapacity, + sequenceProducer_failed, + "External sequence producer returned error code %lu", + (unsigned long)nbExternalSeqs + ); + + RETURN_ERROR_IF( + nbExternalSeqs == 0 && srcSize > 0, + sequenceProducer_failed, + "Got zero sequences from external sequence producer for a non-empty src buffer!" + ); + + if (srcSize == 0) { + ZSTD_memset(&outSeqs[0], 0, sizeof(ZSTD_Sequence)); + return 1; + } + + { + ZSTD_Sequence const lastSeq = outSeqs[nbExternalSeqs - 1]; + + /* We can return early if lastSeq is already a block delimiter. */ + if (lastSeq.offset == 0 && lastSeq.matchLength == 0) { + return nbExternalSeqs; + } + + /* This error condition is only possible if the external matchfinder + * produced an invalid parse, by definition of ZSTD_sequenceBound(). */ + RETURN_ERROR_IF( + nbExternalSeqs == outSeqsCapacity, + sequenceProducer_failed, + "nbExternalSeqs == outSeqsCapacity but lastSeq is not a block delimiter!" + ); + + /* lastSeq is not a block delimiter, so we need to append one. */ + ZSTD_memset(&outSeqs[nbExternalSeqs], 0, sizeof(ZSTD_Sequence)); + return nbExternalSeqs + 1; + } +} + +/* ZSTD_fastSequenceLengthSum() : + * Returns sum(litLen) + sum(matchLen) + lastLits for *seqBuf*. + * Similar to another function in zstd_compress.c (determine_blockSize), + * except it doesn't check for a block delimiter to end summation. + * Removing the early exit allows the compiler to auto-vectorize (https://godbolt.org/z/cY1cajz9P). + * This function can be deleted and replaced by determine_blockSize after we resolve issue #3456. */ +static size_t ZSTD_fastSequenceLengthSum(ZSTD_Sequence const* seqBuf, size_t seqBufSize) { + size_t matchLenSum, litLenSum, i; + matchLenSum = 0; + litLenSum = 0; + for (i = 0; i < seqBufSize; i++) { + litLenSum += seqBuf[i].litLength; + matchLenSum += seqBuf[i].matchLength; + } + return litLenSum + matchLenSum; +} + +typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e; + +static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) +{ + ZSTD_matchState_t* const ms = &zc->blockState.matchState; + DEBUGLOG(5, "ZSTD_buildSeqStore (srcSize=%zu)", srcSize); + assert(srcSize <= ZSTD_BLOCKSIZE_MAX); + /* Assert that we have correctly flushed the ctx params into the ms's copy */ + ZSTD_assertEqualCParams(zc->appliedParams.cParams, ms->cParams); + /* TODO: See 3090. We reduced MIN_CBLOCK_SIZE from 3 to 2 so to compensate we are adding + * additional 1. We need to revisit and change this logic to be more consistent */ + if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1+1) { + if (zc->appliedParams.cParams.strategy >= ZSTD_btopt) { + ZSTD_ldm_skipRawSeqStoreBytes(&zc->externSeqStore, srcSize); + } else { + ZSTD_ldm_skipSequences(&zc->externSeqStore, srcSize, zc->appliedParams.cParams.minMatch); + } + return ZSTDbss_noCompress; /* don't even attempt compression below a certain srcSize */ + } + ZSTD_resetSeqStore(&(zc->seqStore)); + /* required for optimal parser to read stats from dictionary */ + ms->opt.symbolCosts = &zc->blockState.prevCBlock->entropy; + /* tell the optimal parser how we expect to compress literals */ + ms->opt.literalCompressionMode = zc->appliedParams.literalCompressionMode; + /* a gap between an attached dict and the current window is not safe, + * they must remain adjacent, + * and when that stops being the case, the dict must be unset */ + assert(ms->dictMatchState == NULL || ms->loadedDictEnd == ms->window.dictLimit); + + /* limited update after a very long match */ + { const BYTE* const base = ms->window.base; + const BYTE* const istart = (const BYTE*)src; + const U32 curr = (U32)(istart-base); + if (sizeof(ptrdiff_t)==8) assert(istart - base < (ptrdiff_t)(U32)(-1)); /* ensure no overflow */ + if (curr > ms->nextToUpdate + 384) + ms->nextToUpdate = curr - MIN(192, (U32)(curr - ms->nextToUpdate - 384)); + } + + /* select and store sequences */ + { ZSTD_dictMode_e const dictMode = ZSTD_matchState_dictMode(ms); + size_t lastLLSize; + { int i; + for (i = 0; i < ZSTD_REP_NUM; ++i) + zc->blockState.nextCBlock->rep[i] = zc->blockState.prevCBlock->rep[i]; + } + if (zc->externSeqStore.pos < zc->externSeqStore.size) { + assert(zc->appliedParams.ldmParams.enableLdm == ZSTD_ps_disable); + + /* External matchfinder + LDM is technically possible, just not implemented yet. + * We need to revisit soon and implement it. */ + RETURN_ERROR_IF( + ZSTD_hasExtSeqProd(&zc->appliedParams), + parameter_combination_unsupported, + "Long-distance matching with external sequence producer enabled is not currently supported." + ); + + /* Updates ldmSeqStore.pos */ + lastLLSize = + ZSTD_ldm_blockCompress(&zc->externSeqStore, + ms, &zc->seqStore, + zc->blockState.nextCBlock->rep, + zc->appliedParams.useRowMatchFinder, + src, srcSize); + assert(zc->externSeqStore.pos <= zc->externSeqStore.size); + } else if (zc->appliedParams.ldmParams.enableLdm == ZSTD_ps_enable) { + rawSeqStore_t ldmSeqStore = kNullRawSeqStore; + + /* External matchfinder + LDM is technically possible, just not implemented yet. + * We need to revisit soon and implement it. */ + RETURN_ERROR_IF( + ZSTD_hasExtSeqProd(&zc->appliedParams), + parameter_combination_unsupported, + "Long-distance matching with external sequence producer enabled is not currently supported." + ); + + ldmSeqStore.seq = zc->ldmSequences; + ldmSeqStore.capacity = zc->maxNbLdmSequences; + /* Updates ldmSeqStore.size */ + FORWARD_IF_ERROR(ZSTD_ldm_generateSequences(&zc->ldmState, &ldmSeqStore, + &zc->appliedParams.ldmParams, + src, srcSize), ""); + /* Updates ldmSeqStore.pos */ + lastLLSize = + ZSTD_ldm_blockCompress(&ldmSeqStore, + ms, &zc->seqStore, + zc->blockState.nextCBlock->rep, + zc->appliedParams.useRowMatchFinder, + src, srcSize); + assert(ldmSeqStore.pos == ldmSeqStore.size); + } else if (ZSTD_hasExtSeqProd(&zc->appliedParams)) { + assert( + zc->extSeqBufCapacity >= ZSTD_sequenceBound(srcSize) + ); + assert(zc->appliedParams.extSeqProdFunc != NULL); + + { U32 const windowSize = (U32)1 << zc->appliedParams.cParams.windowLog; + + size_t const nbExternalSeqs = (zc->appliedParams.extSeqProdFunc)( + zc->appliedParams.extSeqProdState, + zc->extSeqBuf, + zc->extSeqBufCapacity, + src, srcSize, + NULL, 0, /* dict and dictSize, currently not supported */ + zc->appliedParams.compressionLevel, + windowSize + ); + + size_t const nbPostProcessedSeqs = ZSTD_postProcessSequenceProducerResult( + zc->extSeqBuf, + nbExternalSeqs, + zc->extSeqBufCapacity, + srcSize + ); + + /* Return early if there is no error, since we don't need to worry about last literals */ + if (!ZSTD_isError(nbPostProcessedSeqs)) { + ZSTD_sequencePosition seqPos = {0,0,0}; + size_t const seqLenSum = ZSTD_fastSequenceLengthSum(zc->extSeqBuf, nbPostProcessedSeqs); + RETURN_ERROR_IF(seqLenSum > srcSize, externalSequences_invalid, "External sequences imply too large a block!"); + FORWARD_IF_ERROR( + ZSTD_copySequencesToSeqStoreExplicitBlockDelim( + zc, &seqPos, + zc->extSeqBuf, nbPostProcessedSeqs, + src, srcSize, + zc->appliedParams.searchForExternalRepcodes + ), + "Failed to copy external sequences to seqStore!" + ); + ms->ldmSeqStore = NULL; + DEBUGLOG(5, "Copied %lu sequences from external sequence producer to internal seqStore.", (unsigned long)nbExternalSeqs); + return ZSTDbss_compress; + } + + /* Propagate the error if fallback is disabled */ + if (!zc->appliedParams.enableMatchFinderFallback) { + return nbPostProcessedSeqs; + } + + /* Fallback to software matchfinder */ + { ZSTD_blockCompressor const blockCompressor = + ZSTD_selectBlockCompressor( + zc->appliedParams.cParams.strategy, + zc->appliedParams.useRowMatchFinder, + dictMode); + ms->ldmSeqStore = NULL; + DEBUGLOG( + 5, + "External sequence producer returned error code %lu. Falling back to internal parser.", + (unsigned long)nbExternalSeqs + ); + lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize); + } } + } else { /* not long range mode and no external matchfinder */ + ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor( + zc->appliedParams.cParams.strategy, + zc->appliedParams.useRowMatchFinder, + dictMode); + ms->ldmSeqStore = NULL; + lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize); + } + { const BYTE* const lastLiterals = (const BYTE*)src + srcSize - lastLLSize; + ZSTD_storeLastLiterals(&zc->seqStore, lastLiterals, lastLLSize); + } } + return ZSTDbss_compress; +} + +static size_t ZSTD_copyBlockSequences(SeqCollector* seqCollector, const seqStore_t* seqStore, const U32 prevRepcodes[ZSTD_REP_NUM]) +{ + const seqDef* inSeqs = seqStore->sequencesStart; + const size_t nbInSequences = seqStore->sequences - inSeqs; + const size_t nbInLiterals = (size_t)(seqStore->lit - seqStore->litStart); + + ZSTD_Sequence* outSeqs = seqCollector->seqIndex == 0 ? seqCollector->seqStart : seqCollector->seqStart + seqCollector->seqIndex; + const size_t nbOutSequences = nbInSequences + 1; + size_t nbOutLiterals = 0; + repcodes_t repcodes; + size_t i; + + /* Bounds check that we have enough space for every input sequence + * and the block delimiter + */ + assert(seqCollector->seqIndex <= seqCollector->maxSequences); + RETURN_ERROR_IF( + nbOutSequences > (size_t)(seqCollector->maxSequences - seqCollector->seqIndex), + dstSize_tooSmall, + "Not enough space to copy sequences"); + + ZSTD_memcpy(&repcodes, prevRepcodes, sizeof(repcodes)); + for (i = 0; i < nbInSequences; ++i) { + U32 rawOffset; + outSeqs[i].litLength = inSeqs[i].litLength; + outSeqs[i].matchLength = inSeqs[i].mlBase + MINMATCH; + outSeqs[i].rep = 0; + + /* Handle the possible single length >= 64K + * There can only be one because we add MINMATCH to every match length, + * and blocks are at most 128K. + */ + if (i == seqStore->longLengthPos) { + if (seqStore->longLengthType == ZSTD_llt_literalLength) { + outSeqs[i].litLength += 0x10000; + } else if (seqStore->longLengthType == ZSTD_llt_matchLength) { + outSeqs[i].matchLength += 0x10000; + } + } + + /* Determine the raw offset given the offBase, which may be a repcode. */ + if (OFFBASE_IS_REPCODE(inSeqs[i].offBase)) { + const U32 repcode = OFFBASE_TO_REPCODE(inSeqs[i].offBase); + assert(repcode > 0); + outSeqs[i].rep = repcode; + if (outSeqs[i].litLength != 0) { + rawOffset = repcodes.rep[repcode - 1]; + } else { + if (repcode == 3) { + assert(repcodes.rep[0] > 1); + rawOffset = repcodes.rep[0] - 1; + } else { + rawOffset = repcodes.rep[repcode]; + } + } + } else { + rawOffset = OFFBASE_TO_OFFSET(inSeqs[i].offBase); + } + outSeqs[i].offset = rawOffset; + + /* Update repcode history for the sequence */ + ZSTD_updateRep(repcodes.rep, + inSeqs[i].offBase, + inSeqs[i].litLength == 0); + + nbOutLiterals += outSeqs[i].litLength; + } + /* Insert last literals (if any exist) in the block as a sequence with ml == off == 0. + * If there are no last literals, then we'll emit (of: 0, ml: 0, ll: 0), which is a marker + * for the block boundary, according to the API. + */ + assert(nbInLiterals >= nbOutLiterals); + { + const size_t lastLLSize = nbInLiterals - nbOutLiterals; + outSeqs[nbInSequences].litLength = (U32)lastLLSize; + outSeqs[nbInSequences].matchLength = 0; + outSeqs[nbInSequences].offset = 0; + assert(nbOutSequences == nbInSequences + 1); + } + seqCollector->seqIndex += nbOutSequences; + assert(seqCollector->seqIndex <= seqCollector->maxSequences); + + return 0; +} + +size_t ZSTD_sequenceBound(size_t srcSize) { + const size_t maxNbSeq = (srcSize / ZSTD_MINMATCH_MIN) + 1; + const size_t maxNbDelims = (srcSize / ZSTD_BLOCKSIZE_MAX_MIN) + 1; + return maxNbSeq + maxNbDelims; +} + +size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, + size_t outSeqsSize, const void* src, size_t srcSize) +{ + const size_t dstCapacity = ZSTD_compressBound(srcSize); + void* dst = ZSTD_customMalloc(dstCapacity, ZSTD_defaultCMem); + SeqCollector seqCollector; + { + int targetCBlockSize; + FORWARD_IF_ERROR(ZSTD_CCtx_getParameter(zc, ZSTD_c_targetCBlockSize, &targetCBlockSize), ""); + RETURN_ERROR_IF(targetCBlockSize != 0, parameter_unsupported, "targetCBlockSize != 0"); + } + { + int nbWorkers; + FORWARD_IF_ERROR(ZSTD_CCtx_getParameter(zc, ZSTD_c_nbWorkers, &nbWorkers), ""); + RETURN_ERROR_IF(nbWorkers != 0, parameter_unsupported, "nbWorkers != 0"); + } + + RETURN_ERROR_IF(dst == NULL, memory_allocation, "NULL pointer!"); + + seqCollector.collectSequences = 1; + seqCollector.seqStart = outSeqs; + seqCollector.seqIndex = 0; + seqCollector.maxSequences = outSeqsSize; + zc->seqCollector = seqCollector; + + { + const size_t ret = ZSTD_compress2(zc, dst, dstCapacity, src, srcSize); + ZSTD_customFree(dst, ZSTD_defaultCMem); + FORWARD_IF_ERROR(ret, "ZSTD_compress2 failed"); + } + assert(zc->seqCollector.seqIndex <= ZSTD_sequenceBound(srcSize)); + return zc->seqCollector.seqIndex; +} + +size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t seqsSize) { + size_t in = 0; + size_t out = 0; + for (; in < seqsSize; ++in) { + if (sequences[in].offset == 0 && sequences[in].matchLength == 0) { + if (in != seqsSize - 1) { + sequences[in+1].litLength += sequences[in].litLength; + } + } else { + sequences[out] = sequences[in]; + ++out; + } + } + return out; +} + +/* Unrolled loop to read four size_ts of input at a time. Returns 1 if is RLE, 0 if not. */ +static int ZSTD_isRLE(const BYTE* src, size_t length) { + const BYTE* ip = src; + const BYTE value = ip[0]; + const size_t valueST = (size_t)((U64)value * 0x0101010101010101ULL); + const size_t unrollSize = sizeof(size_t) * 4; + const size_t unrollMask = unrollSize - 1; + const size_t prefixLength = length & unrollMask; + size_t i; + if (length == 1) return 1; + /* Check if prefix is RLE first before using unrolled loop */ + if (prefixLength && ZSTD_count(ip+1, ip, ip+prefixLength) != prefixLength-1) { + return 0; + } + for (i = prefixLength; i != length; i += unrollSize) { + size_t u; + for (u = 0; u < unrollSize; u += sizeof(size_t)) { + if (MEM_readST(ip + i + u) != valueST) { + return 0; + } } } + return 1; +} + +/* Returns true if the given block may be RLE. + * This is just a heuristic based on the compressibility. + * It may return both false positives and false negatives. + */ +static int ZSTD_maybeRLE(seqStore_t const* seqStore) +{ + size_t const nbSeqs = (size_t)(seqStore->sequences - seqStore->sequencesStart); + size_t const nbLits = (size_t)(seqStore->lit - seqStore->litStart); + + return nbSeqs < 4 && nbLits < 10; +} + +static void +ZSTD_blockState_confirmRepcodesAndEntropyTables(ZSTD_blockState_t* const bs) +{ + ZSTD_compressedBlockState_t* const tmp = bs->prevCBlock; + bs->prevCBlock = bs->nextCBlock; + bs->nextCBlock = tmp; +} + +/* Writes the block header */ +static void +writeBlockHeader(void* op, size_t cSize, size_t blockSize, U32 lastBlock) +{ + U32 const cBlockHeader = cSize == 1 ? + lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) : + lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); + MEM_writeLE24(op, cBlockHeader); + DEBUGLOG(3, "writeBlockHeader: cSize: %zu blockSize: %zu lastBlock: %u", cSize, blockSize, lastBlock); +} + +/** ZSTD_buildBlockEntropyStats_literals() : + * Builds entropy for the literals. + * Stores literals block type (raw, rle, compressed, repeat) and + * huffman description table to hufMetadata. + * Requires ENTROPY_WORKSPACE_SIZE workspace + * @return : size of huffman description table, or an error code + */ +static size_t +ZSTD_buildBlockEntropyStats_literals(void* const src, size_t srcSize, + const ZSTD_hufCTables_t* prevHuf, + ZSTD_hufCTables_t* nextHuf, + ZSTD_hufCTablesMetadata_t* hufMetadata, + const int literalsCompressionIsDisabled, + void* workspace, size_t wkspSize, + int hufFlags) +{ + BYTE* const wkspStart = (BYTE*)workspace; + BYTE* const wkspEnd = wkspStart + wkspSize; + BYTE* const countWkspStart = wkspStart; + unsigned* const countWksp = (unsigned*)workspace; + const size_t countWkspSize = (HUF_SYMBOLVALUE_MAX + 1) * sizeof(unsigned); + BYTE* const nodeWksp = countWkspStart + countWkspSize; + const size_t nodeWkspSize = (size_t)(wkspEnd - nodeWksp); + unsigned maxSymbolValue = HUF_SYMBOLVALUE_MAX; + unsigned huffLog = LitHufLog; + HUF_repeat repeat = prevHuf->repeatMode; + DEBUGLOG(5, "ZSTD_buildBlockEntropyStats_literals (srcSize=%zu)", srcSize); + + /* Prepare nextEntropy assuming reusing the existing table */ + ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); + + if (literalsCompressionIsDisabled) { + DEBUGLOG(5, "set_basic - disabled"); + hufMetadata->hType = set_basic; + return 0; + } + + /* small ? don't even attempt compression (speed opt) */ +#ifndef COMPRESS_LITERALS_SIZE_MIN +# define COMPRESS_LITERALS_SIZE_MIN 63 /* heuristic */ +#endif + { size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN; + if (srcSize <= minLitSize) { + DEBUGLOG(5, "set_basic - too small"); + hufMetadata->hType = set_basic; + return 0; + } } + + /* Scan input and build symbol stats */ + { size_t const largest = + HIST_count_wksp (countWksp, &maxSymbolValue, + (const BYTE*)src, srcSize, + workspace, wkspSize); + FORWARD_IF_ERROR(largest, "HIST_count_wksp failed"); + if (largest == srcSize) { + /* only one literal symbol */ + DEBUGLOG(5, "set_rle"); + hufMetadata->hType = set_rle; + return 0; + } + if (largest <= (srcSize >> 7)+4) { + /* heuristic: likely not compressible */ + DEBUGLOG(5, "set_basic - no gain"); + hufMetadata->hType = set_basic; + return 0; + } } + + /* Validate the previous Huffman table */ + if (repeat == HUF_repeat_check + && !HUF_validateCTable((HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue)) { + repeat = HUF_repeat_none; + } + + /* Build Huffman Tree */ + ZSTD_memset(nextHuf->CTable, 0, sizeof(nextHuf->CTable)); + huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue, nodeWksp, nodeWkspSize, nextHuf->CTable, countWksp, hufFlags); + assert(huffLog <= LitHufLog); + { size_t const maxBits = HUF_buildCTable_wksp((HUF_CElt*)nextHuf->CTable, countWksp, + maxSymbolValue, huffLog, + nodeWksp, nodeWkspSize); + FORWARD_IF_ERROR(maxBits, "HUF_buildCTable_wksp"); + huffLog = (U32)maxBits; + } + { /* Build and write the CTable */ + size_t const newCSize = HUF_estimateCompressedSize( + (HUF_CElt*)nextHuf->CTable, countWksp, maxSymbolValue); + size_t const hSize = HUF_writeCTable_wksp( + hufMetadata->hufDesBuffer, sizeof(hufMetadata->hufDesBuffer), + (HUF_CElt*)nextHuf->CTable, maxSymbolValue, huffLog, + nodeWksp, nodeWkspSize); + /* Check against repeating the previous CTable */ + if (repeat != HUF_repeat_none) { + size_t const oldCSize = HUF_estimateCompressedSize( + (HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue); + if (oldCSize < srcSize && (oldCSize <= hSize + newCSize || hSize + 12 >= srcSize)) { + DEBUGLOG(5, "set_repeat - smaller"); + ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); + hufMetadata->hType = set_repeat; + return 0; + } } + if (newCSize + hSize >= srcSize) { + DEBUGLOG(5, "set_basic - no gains"); + ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); + hufMetadata->hType = set_basic; + return 0; + } + DEBUGLOG(5, "set_compressed (hSize=%u)", (U32)hSize); + hufMetadata->hType = set_compressed; + nextHuf->repeatMode = HUF_repeat_check; + return hSize; + } +} + + +/* ZSTD_buildDummySequencesStatistics(): + * Returns a ZSTD_symbolEncodingTypeStats_t with all encoding types as set_basic, + * and updates nextEntropy to the appropriate repeatMode. + */ +static ZSTD_symbolEncodingTypeStats_t +ZSTD_buildDummySequencesStatistics(ZSTD_fseCTables_t* nextEntropy) +{ + ZSTD_symbolEncodingTypeStats_t stats = {set_basic, set_basic, set_basic, 0, 0, 0}; + nextEntropy->litlength_repeatMode = FSE_repeat_none; + nextEntropy->offcode_repeatMode = FSE_repeat_none; + nextEntropy->matchlength_repeatMode = FSE_repeat_none; + return stats; +} + +/** ZSTD_buildBlockEntropyStats_sequences() : + * Builds entropy for the sequences. + * Stores symbol compression modes and fse table to fseMetadata. + * Requires ENTROPY_WORKSPACE_SIZE wksp. + * @return : size of fse tables or error code */ +static size_t +ZSTD_buildBlockEntropyStats_sequences( + const seqStore_t* seqStorePtr, + const ZSTD_fseCTables_t* prevEntropy, + ZSTD_fseCTables_t* nextEntropy, + const ZSTD_CCtx_params* cctxParams, + ZSTD_fseCTablesMetadata_t* fseMetadata, + void* workspace, size_t wkspSize) +{ + ZSTD_strategy const strategy = cctxParams->cParams.strategy; + size_t const nbSeq = (size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart); + BYTE* const ostart = fseMetadata->fseTablesBuffer; + BYTE* const oend = ostart + sizeof(fseMetadata->fseTablesBuffer); + BYTE* op = ostart; + unsigned* countWorkspace = (unsigned*)workspace; + unsigned* entropyWorkspace = countWorkspace + (MaxSeq + 1); + size_t entropyWorkspaceSize = wkspSize - (MaxSeq + 1) * sizeof(*countWorkspace); + ZSTD_symbolEncodingTypeStats_t stats; + + DEBUGLOG(5, "ZSTD_buildBlockEntropyStats_sequences (nbSeq=%zu)", nbSeq); + stats = nbSeq != 0 ? ZSTD_buildSequencesStatistics(seqStorePtr, nbSeq, + prevEntropy, nextEntropy, op, oend, + strategy, countWorkspace, + entropyWorkspace, entropyWorkspaceSize) + : ZSTD_buildDummySequencesStatistics(nextEntropy); + FORWARD_IF_ERROR(stats.size, "ZSTD_buildSequencesStatistics failed!"); + fseMetadata->llType = (symbolEncodingType_e) stats.LLtype; + fseMetadata->ofType = (symbolEncodingType_e) stats.Offtype; + fseMetadata->mlType = (symbolEncodingType_e) stats.MLtype; + fseMetadata->lastCountSize = stats.lastCountSize; + return stats.size; +} + + +/** ZSTD_buildBlockEntropyStats() : + * Builds entropy for the block. + * Requires workspace size ENTROPY_WORKSPACE_SIZE + * @return : 0 on success, or an error code + * Note : also employed in superblock + */ +size_t ZSTD_buildBlockEntropyStats( + const seqStore_t* seqStorePtr, + const ZSTD_entropyCTables_t* prevEntropy, + ZSTD_entropyCTables_t* nextEntropy, + const ZSTD_CCtx_params* cctxParams, + ZSTD_entropyCTablesMetadata_t* entropyMetadata, + void* workspace, size_t wkspSize) +{ + size_t const litSize = (size_t)(seqStorePtr->lit - seqStorePtr->litStart); + int const huf_useOptDepth = (cctxParams->cParams.strategy >= HUF_OPTIMAL_DEPTH_THRESHOLD); + int const hufFlags = huf_useOptDepth ? HUF_flags_optimalDepth : 0; + + entropyMetadata->hufMetadata.hufDesSize = + ZSTD_buildBlockEntropyStats_literals(seqStorePtr->litStart, litSize, + &prevEntropy->huf, &nextEntropy->huf, + &entropyMetadata->hufMetadata, + ZSTD_literalsCompressionIsDisabled(cctxParams), + workspace, wkspSize, hufFlags); + + FORWARD_IF_ERROR(entropyMetadata->hufMetadata.hufDesSize, "ZSTD_buildBlockEntropyStats_literals failed"); + entropyMetadata->fseMetadata.fseTablesSize = + ZSTD_buildBlockEntropyStats_sequences(seqStorePtr, + &prevEntropy->fse, &nextEntropy->fse, + cctxParams, + &entropyMetadata->fseMetadata, + workspace, wkspSize); + FORWARD_IF_ERROR(entropyMetadata->fseMetadata.fseTablesSize, "ZSTD_buildBlockEntropyStats_sequences failed"); + return 0; +} + +/* Returns the size estimate for the literals section (header + content) of a block */ +static size_t +ZSTD_estimateBlockSize_literal(const BYTE* literals, size_t litSize, + const ZSTD_hufCTables_t* huf, + const ZSTD_hufCTablesMetadata_t* hufMetadata, + void* workspace, size_t wkspSize, + int writeEntropy) +{ + unsigned* const countWksp = (unsigned*)workspace; + unsigned maxSymbolValue = HUF_SYMBOLVALUE_MAX; + size_t literalSectionHeaderSize = 3 + (litSize >= 1 KB) + (litSize >= 16 KB); + U32 singleStream = litSize < 256; + + if (hufMetadata->hType == set_basic) return litSize; + else if (hufMetadata->hType == set_rle) return 1; + else if (hufMetadata->hType == set_compressed || hufMetadata->hType == set_repeat) { + size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)literals, litSize, workspace, wkspSize); + if (ZSTD_isError(largest)) return litSize; + { size_t cLitSizeEstimate = HUF_estimateCompressedSize((const HUF_CElt*)huf->CTable, countWksp, maxSymbolValue); + if (writeEntropy) cLitSizeEstimate += hufMetadata->hufDesSize; + if (!singleStream) cLitSizeEstimate += 6; /* multi-stream huffman uses 6-byte jump table */ + return cLitSizeEstimate + literalSectionHeaderSize; + } } + assert(0); /* impossible */ + return 0; +} + +/* Returns the size estimate for the FSE-compressed symbols (of, ml, ll) of a block */ +static size_t +ZSTD_estimateBlockSize_symbolType(symbolEncodingType_e type, + const BYTE* codeTable, size_t nbSeq, unsigned maxCode, + const FSE_CTable* fseCTable, + const U8* additionalBits, + short const* defaultNorm, U32 defaultNormLog, U32 defaultMax, + void* workspace, size_t wkspSize) +{ + unsigned* const countWksp = (unsigned*)workspace; + const BYTE* ctp = codeTable; + const BYTE* const ctStart = ctp; + const BYTE* const ctEnd = ctStart + nbSeq; + size_t cSymbolTypeSizeEstimateInBits = 0; + unsigned max = maxCode; + + HIST_countFast_wksp(countWksp, &max, codeTable, nbSeq, workspace, wkspSize); /* can't fail */ + if (type == set_basic) { + /* We selected this encoding type, so it must be valid. */ + assert(max <= defaultMax); + (void)defaultMax; + cSymbolTypeSizeEstimateInBits = ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, countWksp, max); + } else if (type == set_rle) { + cSymbolTypeSizeEstimateInBits = 0; + } else if (type == set_compressed || type == set_repeat) { + cSymbolTypeSizeEstimateInBits = ZSTD_fseBitCost(fseCTable, countWksp, max); + } + if (ZSTD_isError(cSymbolTypeSizeEstimateInBits)) { + return nbSeq * 10; + } + while (ctp < ctEnd) { + if (additionalBits) cSymbolTypeSizeEstimateInBits += additionalBits[*ctp]; + else cSymbolTypeSizeEstimateInBits += *ctp; /* for offset, offset code is also the number of additional bits */ + ctp++; + } + return cSymbolTypeSizeEstimateInBits >> 3; +} + +/* Returns the size estimate for the sequences section (header + content) of a block */ +static size_t +ZSTD_estimateBlockSize_sequences(const BYTE* ofCodeTable, + const BYTE* llCodeTable, + const BYTE* mlCodeTable, + size_t nbSeq, + const ZSTD_fseCTables_t* fseTables, + const ZSTD_fseCTablesMetadata_t* fseMetadata, + void* workspace, size_t wkspSize, + int writeEntropy) +{ + size_t sequencesSectionHeaderSize = 1 /* seqHead */ + 1 /* min seqSize size */ + (nbSeq >= 128) + (nbSeq >= LONGNBSEQ); + size_t cSeqSizeEstimate = 0; + cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->ofType, ofCodeTable, nbSeq, MaxOff, + fseTables->offcodeCTable, NULL, + OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, + workspace, wkspSize); + cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->llType, llCodeTable, nbSeq, MaxLL, + fseTables->litlengthCTable, LL_bits, + LL_defaultNorm, LL_defaultNormLog, MaxLL, + workspace, wkspSize); + cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->mlType, mlCodeTable, nbSeq, MaxML, + fseTables->matchlengthCTable, ML_bits, + ML_defaultNorm, ML_defaultNormLog, MaxML, + workspace, wkspSize); + if (writeEntropy) cSeqSizeEstimate += fseMetadata->fseTablesSize; + return cSeqSizeEstimate + sequencesSectionHeaderSize; +} + +/* Returns the size estimate for a given stream of literals, of, ll, ml */ +static size_t +ZSTD_estimateBlockSize(const BYTE* literals, size_t litSize, + const BYTE* ofCodeTable, + const BYTE* llCodeTable, + const BYTE* mlCodeTable, + size_t nbSeq, + const ZSTD_entropyCTables_t* entropy, + const ZSTD_entropyCTablesMetadata_t* entropyMetadata, + void* workspace, size_t wkspSize, + int writeLitEntropy, int writeSeqEntropy) +{ + size_t const literalsSize = ZSTD_estimateBlockSize_literal(literals, litSize, + &entropy->huf, &entropyMetadata->hufMetadata, + workspace, wkspSize, writeLitEntropy); + size_t const seqSize = ZSTD_estimateBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable, + nbSeq, &entropy->fse, &entropyMetadata->fseMetadata, + workspace, wkspSize, writeSeqEntropy); + return seqSize + literalsSize + ZSTD_blockHeaderSize; +} + +/* Builds entropy statistics and uses them for blocksize estimation. + * + * @return: estimated compressed size of the seqStore, or a zstd error. + */ +static size_t +ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(seqStore_t* seqStore, ZSTD_CCtx* zc) +{ + ZSTD_entropyCTablesMetadata_t* const entropyMetadata = &zc->blockSplitCtx.entropyMetadata; + DEBUGLOG(6, "ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize()"); + FORWARD_IF_ERROR(ZSTD_buildBlockEntropyStats(seqStore, + &zc->blockState.prevCBlock->entropy, + &zc->blockState.nextCBlock->entropy, + &zc->appliedParams, + entropyMetadata, + zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE), ""); + return ZSTD_estimateBlockSize( + seqStore->litStart, (size_t)(seqStore->lit - seqStore->litStart), + seqStore->ofCode, seqStore->llCode, seqStore->mlCode, + (size_t)(seqStore->sequences - seqStore->sequencesStart), + &zc->blockState.nextCBlock->entropy, + entropyMetadata, + zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE, + (int)(entropyMetadata->hufMetadata.hType == set_compressed), 1); +} + +/* Returns literals bytes represented in a seqStore */ +static size_t ZSTD_countSeqStoreLiteralsBytes(const seqStore_t* const seqStore) +{ + size_t literalsBytes = 0; + size_t const nbSeqs = (size_t)(seqStore->sequences - seqStore->sequencesStart); + size_t i; + for (i = 0; i < nbSeqs; ++i) { + seqDef const seq = seqStore->sequencesStart[i]; + literalsBytes += seq.litLength; + if (i == seqStore->longLengthPos && seqStore->longLengthType == ZSTD_llt_literalLength) { + literalsBytes += 0x10000; + } } + return literalsBytes; +} + +/* Returns match bytes represented in a seqStore */ +static size_t ZSTD_countSeqStoreMatchBytes(const seqStore_t* const seqStore) +{ + size_t matchBytes = 0; + size_t const nbSeqs = (size_t)(seqStore->sequences - seqStore->sequencesStart); + size_t i; + for (i = 0; i < nbSeqs; ++i) { + seqDef seq = seqStore->sequencesStart[i]; + matchBytes += seq.mlBase + MINMATCH; + if (i == seqStore->longLengthPos && seqStore->longLengthType == ZSTD_llt_matchLength) { + matchBytes += 0x10000; + } } + return matchBytes; +} + +/* Derives the seqStore that is a chunk of the originalSeqStore from [startIdx, endIdx). + * Stores the result in resultSeqStore. + */ +static void ZSTD_deriveSeqStoreChunk(seqStore_t* resultSeqStore, + const seqStore_t* originalSeqStore, + size_t startIdx, size_t endIdx) +{ + *resultSeqStore = *originalSeqStore; + if (startIdx > 0) { + resultSeqStore->sequences = originalSeqStore->sequencesStart + startIdx; + resultSeqStore->litStart += ZSTD_countSeqStoreLiteralsBytes(resultSeqStore); + } + + /* Move longLengthPos into the correct position if necessary */ + if (originalSeqStore->longLengthType != ZSTD_llt_none) { + if (originalSeqStore->longLengthPos < startIdx || originalSeqStore->longLengthPos > endIdx) { + resultSeqStore->longLengthType = ZSTD_llt_none; + } else { + resultSeqStore->longLengthPos -= (U32)startIdx; + } + } + resultSeqStore->sequencesStart = originalSeqStore->sequencesStart + startIdx; + resultSeqStore->sequences = originalSeqStore->sequencesStart + endIdx; + if (endIdx == (size_t)(originalSeqStore->sequences - originalSeqStore->sequencesStart)) { + /* This accounts for possible last literals if the derived chunk reaches the end of the block */ + assert(resultSeqStore->lit == originalSeqStore->lit); + } else { + size_t const literalsBytes = ZSTD_countSeqStoreLiteralsBytes(resultSeqStore); + resultSeqStore->lit = resultSeqStore->litStart + literalsBytes; + } + resultSeqStore->llCode += startIdx; + resultSeqStore->mlCode += startIdx; + resultSeqStore->ofCode += startIdx; +} + +/** + * Returns the raw offset represented by the combination of offBase, ll0, and repcode history. + * offBase must represent a repcode in the numeric representation of ZSTD_storeSeq(). + */ +static U32 +ZSTD_resolveRepcodeToRawOffset(const U32 rep[ZSTD_REP_NUM], const U32 offBase, const U32 ll0) +{ + U32 const adjustedRepCode = OFFBASE_TO_REPCODE(offBase) - 1 + ll0; /* [ 0 - 3 ] */ + assert(OFFBASE_IS_REPCODE(offBase)); + if (adjustedRepCode == ZSTD_REP_NUM) { + assert(ll0); + /* litlength == 0 and offCode == 2 implies selection of first repcode - 1 + * This is only valid if it results in a valid offset value, aka > 0. + * Note : it may happen that `rep[0]==1` in exceptional circumstances. + * In which case this function will return 0, which is an invalid offset. + * It's not an issue though, since this value will be + * compared and discarded within ZSTD_seqStore_resolveOffCodes(). + */ + return rep[0] - 1; + } + return rep[adjustedRepCode]; +} + +/** + * ZSTD_seqStore_resolveOffCodes() reconciles any possible divergences in offset history that may arise + * due to emission of RLE/raw blocks that disturb the offset history, + * and replaces any repcodes within the seqStore that may be invalid. + * + * dRepcodes are updated as would be on the decompression side. + * cRepcodes are updated exactly in accordance with the seqStore. + * + * Note : this function assumes seq->offBase respects the following numbering scheme : + * 0 : invalid + * 1-3 : repcode 1-3 + * 4+ : real_offset+3 + */ +static void +ZSTD_seqStore_resolveOffCodes(repcodes_t* const dRepcodes, repcodes_t* const cRepcodes, + const seqStore_t* const seqStore, U32 const nbSeq) +{ + U32 idx = 0; + U32 const longLitLenIdx = seqStore->longLengthType == ZSTD_llt_literalLength ? seqStore->longLengthPos : nbSeq; + for (; idx < nbSeq; ++idx) { + seqDef* const seq = seqStore->sequencesStart + idx; + U32 const ll0 = (seq->litLength == 0) && (idx != longLitLenIdx); + U32 const offBase = seq->offBase; + assert(offBase > 0); + if (OFFBASE_IS_REPCODE(offBase)) { + U32 const dRawOffset = ZSTD_resolveRepcodeToRawOffset(dRepcodes->rep, offBase, ll0); + U32 const cRawOffset = ZSTD_resolveRepcodeToRawOffset(cRepcodes->rep, offBase, ll0); + /* Adjust simulated decompression repcode history if we come across a mismatch. Replace + * the repcode with the offset it actually references, determined by the compression + * repcode history. + */ + if (dRawOffset != cRawOffset) { + seq->offBase = OFFSET_TO_OFFBASE(cRawOffset); + } + } + /* Compression repcode history is always updated with values directly from the unmodified seqStore. + * Decompression repcode history may use modified seq->offset value taken from compression repcode history. + */ + ZSTD_updateRep(dRepcodes->rep, seq->offBase, ll0); + ZSTD_updateRep(cRepcodes->rep, offBase, ll0); + } +} + +/* ZSTD_compressSeqStore_singleBlock(): + * Compresses a seqStore into a block with a block header, into the buffer dst. + * + * Returns the total size of that block (including header) or a ZSTD error code. + */ +static size_t +ZSTD_compressSeqStore_singleBlock(ZSTD_CCtx* zc, + const seqStore_t* const seqStore, + repcodes_t* const dRep, repcodes_t* const cRep, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + U32 lastBlock, U32 isPartition) +{ + const U32 rleMaxLength = 25; + BYTE* op = (BYTE*)dst; + const BYTE* ip = (const BYTE*)src; + size_t cSize; + size_t cSeqsSize; + + /* In case of an RLE or raw block, the simulated decompression repcode history must be reset */ + repcodes_t const dRepOriginal = *dRep; + DEBUGLOG(5, "ZSTD_compressSeqStore_singleBlock"); + if (isPartition) + ZSTD_seqStore_resolveOffCodes(dRep, cRep, seqStore, (U32)(seqStore->sequences - seqStore->sequencesStart)); + + RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize, dstSize_tooSmall, "Block header doesn't fit"); + cSeqsSize = ZSTD_entropyCompressSeqStore(seqStore, + &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy, + &zc->appliedParams, + op + ZSTD_blockHeaderSize, dstCapacity - ZSTD_blockHeaderSize, + srcSize, + zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */, + zc->bmi2); + FORWARD_IF_ERROR(cSeqsSize, "ZSTD_entropyCompressSeqStore failed!"); + + if (!zc->isFirstBlock && + cSeqsSize < rleMaxLength && + ZSTD_isRLE((BYTE const*)src, srcSize)) { + /* We don't want to emit our first block as a RLE even if it qualifies because + * doing so will cause the decoder (cli only) to throw a "should consume all input error." + * This is only an issue for zstd <= v1.4.3 + */ + cSeqsSize = 1; + } + + /* Sequence collection not supported when block splitting */ + if (zc->seqCollector.collectSequences) { + FORWARD_IF_ERROR(ZSTD_copyBlockSequences(&zc->seqCollector, seqStore, dRepOriginal.rep), "copyBlockSequences failed"); + ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState); + return 0; + } + + if (cSeqsSize == 0) { + cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, srcSize, lastBlock); + FORWARD_IF_ERROR(cSize, "Nocompress block failed"); + DEBUGLOG(4, "Writing out nocompress block, size: %zu", cSize); + *dRep = dRepOriginal; /* reset simulated decompression repcode history */ + } else if (cSeqsSize == 1) { + cSize = ZSTD_rleCompressBlock(op, dstCapacity, *ip, srcSize, lastBlock); + FORWARD_IF_ERROR(cSize, "RLE compress block failed"); + DEBUGLOG(4, "Writing out RLE block, size: %zu", cSize); + *dRep = dRepOriginal; /* reset simulated decompression repcode history */ + } else { + ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState); + writeBlockHeader(op, cSeqsSize, srcSize, lastBlock); + cSize = ZSTD_blockHeaderSize + cSeqsSize; + DEBUGLOG(4, "Writing out compressed block, size: %zu", cSize); + } + + if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) + zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; + + return cSize; +} + +/* Struct to keep track of where we are in our recursive calls. */ +typedef struct { + U32* splitLocations; /* Array of split indices */ + size_t idx; /* The current index within splitLocations being worked on */ +} seqStoreSplits; + +#define MIN_SEQUENCES_BLOCK_SPLITTING 300 + +/* Helper function to perform the recursive search for block splits. + * Estimates the cost of seqStore prior to split, and estimates the cost of splitting the sequences in half. + * If advantageous to split, then we recurse down the two sub-blocks. + * If not, or if an error occurred in estimation, then we do not recurse. + * + * Note: The recursion depth is capped by a heuristic minimum number of sequences, + * defined by MIN_SEQUENCES_BLOCK_SPLITTING. + * In theory, this means the absolute largest recursion depth is 10 == log2(maxNbSeqInBlock/MIN_SEQUENCES_BLOCK_SPLITTING). + * In practice, recursion depth usually doesn't go beyond 4. + * + * Furthermore, the number of splits is capped by ZSTD_MAX_NB_BLOCK_SPLITS. + * At ZSTD_MAX_NB_BLOCK_SPLITS == 196 with the current existing blockSize + * maximum of 128 KB, this value is actually impossible to reach. + */ +static void +ZSTD_deriveBlockSplitsHelper(seqStoreSplits* splits, size_t startIdx, size_t endIdx, + ZSTD_CCtx* zc, const seqStore_t* origSeqStore) +{ + seqStore_t* const fullSeqStoreChunk = &zc->blockSplitCtx.fullSeqStoreChunk; + seqStore_t* const firstHalfSeqStore = &zc->blockSplitCtx.firstHalfSeqStore; + seqStore_t* const secondHalfSeqStore = &zc->blockSplitCtx.secondHalfSeqStore; + size_t estimatedOriginalSize; + size_t estimatedFirstHalfSize; + size_t estimatedSecondHalfSize; + size_t midIdx = (startIdx + endIdx)/2; + + DEBUGLOG(5, "ZSTD_deriveBlockSplitsHelper: startIdx=%zu endIdx=%zu", startIdx, endIdx); + assert(endIdx >= startIdx); + if (endIdx - startIdx < MIN_SEQUENCES_BLOCK_SPLITTING || splits->idx >= ZSTD_MAX_NB_BLOCK_SPLITS) { + DEBUGLOG(6, "ZSTD_deriveBlockSplitsHelper: Too few sequences (%zu)", endIdx - startIdx); + return; + } + ZSTD_deriveSeqStoreChunk(fullSeqStoreChunk, origSeqStore, startIdx, endIdx); + ZSTD_deriveSeqStoreChunk(firstHalfSeqStore, origSeqStore, startIdx, midIdx); + ZSTD_deriveSeqStoreChunk(secondHalfSeqStore, origSeqStore, midIdx, endIdx); + estimatedOriginalSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(fullSeqStoreChunk, zc); + estimatedFirstHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(firstHalfSeqStore, zc); + estimatedSecondHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(secondHalfSeqStore, zc); + DEBUGLOG(5, "Estimated original block size: %zu -- First half split: %zu -- Second half split: %zu", + estimatedOriginalSize, estimatedFirstHalfSize, estimatedSecondHalfSize); + if (ZSTD_isError(estimatedOriginalSize) || ZSTD_isError(estimatedFirstHalfSize) || ZSTD_isError(estimatedSecondHalfSize)) { + return; + } + if (estimatedFirstHalfSize + estimatedSecondHalfSize < estimatedOriginalSize) { + DEBUGLOG(5, "split decided at seqNb:%zu", midIdx); + ZSTD_deriveBlockSplitsHelper(splits, startIdx, midIdx, zc, origSeqStore); + splits->splitLocations[splits->idx] = (U32)midIdx; + splits->idx++; + ZSTD_deriveBlockSplitsHelper(splits, midIdx, endIdx, zc, origSeqStore); + } +} + +/* Base recursive function. + * Populates a table with intra-block partition indices that can improve compression ratio. + * + * @return: number of splits made (which equals the size of the partition table - 1). + */ +static size_t ZSTD_deriveBlockSplits(ZSTD_CCtx* zc, U32 partitions[], U32 nbSeq) +{ + seqStoreSplits splits; + splits.splitLocations = partitions; + splits.idx = 0; + if (nbSeq <= 4) { + DEBUGLOG(5, "ZSTD_deriveBlockSplits: Too few sequences to split (%u <= 4)", nbSeq); + /* Refuse to try and split anything with less than 4 sequences */ + return 0; + } + ZSTD_deriveBlockSplitsHelper(&splits, 0, nbSeq, zc, &zc->seqStore); + splits.splitLocations[splits.idx] = nbSeq; + DEBUGLOG(5, "ZSTD_deriveBlockSplits: final nb partitions: %zu", splits.idx+1); + return splits.idx; +} + +/* ZSTD_compressBlock_splitBlock(): + * Attempts to split a given block into multiple blocks to improve compression ratio. + * + * Returns combined size of all blocks (which includes headers), or a ZSTD error code. + */ +static size_t +ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc, + void* dst, size_t dstCapacity, + const void* src, size_t blockSize, + U32 lastBlock, U32 nbSeq) +{ + size_t cSize = 0; + const BYTE* ip = (const BYTE*)src; + BYTE* op = (BYTE*)dst; + size_t i = 0; + size_t srcBytesTotal = 0; + U32* const partitions = zc->blockSplitCtx.partitions; /* size == ZSTD_MAX_NB_BLOCK_SPLITS */ + seqStore_t* const nextSeqStore = &zc->blockSplitCtx.nextSeqStore; + seqStore_t* const currSeqStore = &zc->blockSplitCtx.currSeqStore; + size_t const numSplits = ZSTD_deriveBlockSplits(zc, partitions, nbSeq); + + /* If a block is split and some partitions are emitted as RLE/uncompressed, then repcode history + * may become invalid. In order to reconcile potentially invalid repcodes, we keep track of two + * separate repcode histories that simulate repcode history on compression and decompression side, + * and use the histories to determine whether we must replace a particular repcode with its raw offset. + * + * 1) cRep gets updated for each partition, regardless of whether the block was emitted as uncompressed + * or RLE. This allows us to retrieve the offset value that an invalid repcode references within + * a nocompress/RLE block. + * 2) dRep gets updated only for compressed partitions, and when a repcode gets replaced, will use + * the replacement offset value rather than the original repcode to update the repcode history. + * dRep also will be the final repcode history sent to the next block. + * + * See ZSTD_seqStore_resolveOffCodes() for more details. + */ + repcodes_t dRep; + repcodes_t cRep; + ZSTD_memcpy(dRep.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t)); + ZSTD_memcpy(cRep.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t)); + ZSTD_memset(nextSeqStore, 0, sizeof(seqStore_t)); + + DEBUGLOG(5, "ZSTD_compressBlock_splitBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)", + (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, + (unsigned)zc->blockState.matchState.nextToUpdate); + + if (numSplits == 0) { + size_t cSizeSingleBlock = + ZSTD_compressSeqStore_singleBlock(zc, &zc->seqStore, + &dRep, &cRep, + op, dstCapacity, + ip, blockSize, + lastBlock, 0 /* isPartition */); + FORWARD_IF_ERROR(cSizeSingleBlock, "Compressing single block from splitBlock_internal() failed!"); + DEBUGLOG(5, "ZSTD_compressBlock_splitBlock_internal: No splits"); + assert(zc->blockSize <= ZSTD_BLOCKSIZE_MAX); + assert(cSizeSingleBlock <= zc->blockSize + ZSTD_blockHeaderSize); + return cSizeSingleBlock; + } + + ZSTD_deriveSeqStoreChunk(currSeqStore, &zc->seqStore, 0, partitions[0]); + for (i = 0; i <= numSplits; ++i) { + size_t cSizeChunk; + U32 const lastPartition = (i == numSplits); + U32 lastBlockEntireSrc = 0; + + size_t srcBytes = ZSTD_countSeqStoreLiteralsBytes(currSeqStore) + ZSTD_countSeqStoreMatchBytes(currSeqStore); + srcBytesTotal += srcBytes; + if (lastPartition) { + /* This is the final partition, need to account for possible last literals */ + srcBytes += blockSize - srcBytesTotal; + lastBlockEntireSrc = lastBlock; + } else { + ZSTD_deriveSeqStoreChunk(nextSeqStore, &zc->seqStore, partitions[i], partitions[i+1]); + } + + cSizeChunk = ZSTD_compressSeqStore_singleBlock(zc, currSeqStore, + &dRep, &cRep, + op, dstCapacity, + ip, srcBytes, + lastBlockEntireSrc, 1 /* isPartition */); + DEBUGLOG(5, "Estimated size: %zu vs %zu : actual size", + ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(currSeqStore, zc), cSizeChunk); + FORWARD_IF_ERROR(cSizeChunk, "Compressing chunk failed!"); + + ip += srcBytes; + op += cSizeChunk; + dstCapacity -= cSizeChunk; + cSize += cSizeChunk; + *currSeqStore = *nextSeqStore; + assert(cSizeChunk <= zc->blockSize + ZSTD_blockHeaderSize); + } + /* cRep and dRep may have diverged during the compression. + * If so, we use the dRep repcodes for the next block. + */ + ZSTD_memcpy(zc->blockState.prevCBlock->rep, dRep.rep, sizeof(repcodes_t)); + return cSize; +} + +static size_t +ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, U32 lastBlock) +{ + U32 nbSeq; + size_t cSize; + DEBUGLOG(4, "ZSTD_compressBlock_splitBlock"); + assert(zc->appliedParams.useBlockSplitter == ZSTD_ps_enable); + + { const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize); + FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed"); + if (bss == ZSTDbss_noCompress) { + if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) + zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; + RETURN_ERROR_IF(zc->seqCollector.collectSequences, sequenceProducer_failed, "Uncompressible block"); + cSize = ZSTD_noCompressBlock(dst, dstCapacity, src, srcSize, lastBlock); + FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed"); + DEBUGLOG(4, "ZSTD_compressBlock_splitBlock: Nocompress block"); + return cSize; + } + nbSeq = (U32)(zc->seqStore.sequences - zc->seqStore.sequencesStart); + } + + cSize = ZSTD_compressBlock_splitBlock_internal(zc, dst, dstCapacity, src, srcSize, lastBlock, nbSeq); + FORWARD_IF_ERROR(cSize, "Splitting blocks failed!"); + return cSize; +} + +static size_t +ZSTD_compressBlock_internal(ZSTD_CCtx* zc, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, U32 frame) +{ + /* This is an estimated upper bound for the length of an rle block. + * This isn't the actual upper bound. + * Finding the real threshold needs further investigation. + */ + const U32 rleMaxLength = 25; + size_t cSize; + const BYTE* ip = (const BYTE*)src; + BYTE* op = (BYTE*)dst; + DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)", + (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, + (unsigned)zc->blockState.matchState.nextToUpdate); + + { const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize); + FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed"); + if (bss == ZSTDbss_noCompress) { + RETURN_ERROR_IF(zc->seqCollector.collectSequences, sequenceProducer_failed, "Uncompressible block"); + cSize = 0; + goto out; + } + } + + if (zc->seqCollector.collectSequences) { + FORWARD_IF_ERROR(ZSTD_copyBlockSequences(&zc->seqCollector, ZSTD_getSeqStore(zc), zc->blockState.prevCBlock->rep), "copyBlockSequences failed"); + ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState); + return 0; + } + + /* encode sequences and literals */ + cSize = ZSTD_entropyCompressSeqStore(&zc->seqStore, + &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy, + &zc->appliedParams, + dst, dstCapacity, + srcSize, + zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */, + zc->bmi2); + + if (frame && + /* We don't want to emit our first block as a RLE even if it qualifies because + * doing so will cause the decoder (cli only) to throw a "should consume all input error." + * This is only an issue for zstd <= v1.4.3 + */ + !zc->isFirstBlock && + cSize < rleMaxLength && + ZSTD_isRLE(ip, srcSize)) + { + cSize = 1; + op[0] = ip[0]; + } + +out: + if (!ZSTD_isError(cSize) && cSize > 1) { + ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState); + } + /* We check that dictionaries have offset codes available for the first + * block. After the first block, the offcode table might not have large + * enough codes to represent the offsets in the data. + */ + if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) + zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; + + return cSize; +} + +static size_t ZSTD_compressBlock_targetCBlockSize_body(ZSTD_CCtx* zc, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const size_t bss, U32 lastBlock) +{ + DEBUGLOG(6, "Attempting ZSTD_compressSuperBlock()"); + if (bss == ZSTDbss_compress) { + if (/* We don't want to emit our first block as a RLE even if it qualifies because + * doing so will cause the decoder (cli only) to throw a "should consume all input error." + * This is only an issue for zstd <= v1.4.3 + */ + !zc->isFirstBlock && + ZSTD_maybeRLE(&zc->seqStore) && + ZSTD_isRLE((BYTE const*)src, srcSize)) + { + return ZSTD_rleCompressBlock(dst, dstCapacity, *(BYTE const*)src, srcSize, lastBlock); + } + /* Attempt superblock compression. + * + * Note that compressed size of ZSTD_compressSuperBlock() is not bound by the + * standard ZSTD_compressBound(). This is a problem, because even if we have + * space now, taking an extra byte now could cause us to run out of space later + * and violate ZSTD_compressBound(). + * + * Define blockBound(blockSize) = blockSize + ZSTD_blockHeaderSize. + * + * In order to respect ZSTD_compressBound() we must attempt to emit a raw + * uncompressed block in these cases: + * * cSize == 0: Return code for an uncompressed block. + * * cSize == dstSize_tooSmall: We may have expanded beyond blockBound(srcSize). + * ZSTD_noCompressBlock() will return dstSize_tooSmall if we are really out of + * output space. + * * cSize >= blockBound(srcSize): We have expanded the block too much so + * emit an uncompressed block. + */ + { size_t const cSize = + ZSTD_compressSuperBlock(zc, dst, dstCapacity, src, srcSize, lastBlock); + if (cSize != ERROR(dstSize_tooSmall)) { + size_t const maxCSize = + srcSize - ZSTD_minGain(srcSize, zc->appliedParams.cParams.strategy); + FORWARD_IF_ERROR(cSize, "ZSTD_compressSuperBlock failed"); + if (cSize != 0 && cSize < maxCSize + ZSTD_blockHeaderSize) { + ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState); + return cSize; + } + } + } + } /* if (bss == ZSTDbss_compress)*/ + + DEBUGLOG(6, "Resorting to ZSTD_noCompressBlock()"); + /* Superblock compression failed, attempt to emit a single no compress block. + * The decoder will be able to stream this block since it is uncompressed. + */ + return ZSTD_noCompressBlock(dst, dstCapacity, src, srcSize, lastBlock); +} + +static size_t ZSTD_compressBlock_targetCBlockSize(ZSTD_CCtx* zc, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + U32 lastBlock) +{ + size_t cSize = 0; + const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize); + DEBUGLOG(5, "ZSTD_compressBlock_targetCBlockSize (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u, srcSize=%zu)", + (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, (unsigned)zc->blockState.matchState.nextToUpdate, srcSize); + FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed"); + + cSize = ZSTD_compressBlock_targetCBlockSize_body(zc, dst, dstCapacity, src, srcSize, bss, lastBlock); + FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_targetCBlockSize_body failed"); + + if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) + zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; + + return cSize; +} + +static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms, + ZSTD_cwksp* ws, + ZSTD_CCtx_params const* params, + void const* ip, + void const* iend) +{ + U32 const cycleLog = ZSTD_cycleLog(params->cParams.chainLog, params->cParams.strategy); + U32 const maxDist = (U32)1 << params->cParams.windowLog; + if (ZSTD_window_needOverflowCorrection(ms->window, cycleLog, maxDist, ms->loadedDictEnd, ip, iend)) { + U32 const correction = ZSTD_window_correctOverflow(&ms->window, cycleLog, maxDist, ip); + ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30); + ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30); + ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31); + ZSTD_cwksp_mark_tables_dirty(ws); + ZSTD_reduceIndex(ms, params, correction); + ZSTD_cwksp_mark_tables_clean(ws); + if (ms->nextToUpdate < correction) ms->nextToUpdate = 0; + else ms->nextToUpdate -= correction; + /* invalidate dictionaries on overflow correction */ + ms->loadedDictEnd = 0; + ms->dictMatchState = NULL; + } +} + +/*! ZSTD_compress_frameChunk() : +* Compress a chunk of data into one or multiple blocks. +* All blocks will be terminated, all input will be consumed. +* Function will issue an error if there is not enough `dstCapacity` to hold the compressed content. +* Frame is supposed already started (header already produced) +* @return : compressed size, or an error code +*/ +static size_t ZSTD_compress_frameChunk(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + U32 lastFrameChunk) +{ + size_t blockSize = cctx->blockSize; + size_t remaining = srcSize; + const BYTE* ip = (const BYTE*)src; + BYTE* const ostart = (BYTE*)dst; + BYTE* op = ostart; + U32 const maxDist = (U32)1 << cctx->appliedParams.cParams.windowLog; + + assert(cctx->appliedParams.cParams.windowLog <= ZSTD_WINDOWLOG_MAX); + + DEBUGLOG(4, "ZSTD_compress_frameChunk (blockSize=%u)", (unsigned)blockSize); + if (cctx->appliedParams.fParams.checksumFlag && srcSize) + XXH64_update(&cctx->xxhState, src, srcSize); + + while (remaining) { + ZSTD_matchState_t* const ms = &cctx->blockState.matchState; + U32 const lastBlock = lastFrameChunk & (blockSize >= remaining); + + /* TODO: See 3090. We reduced MIN_CBLOCK_SIZE from 3 to 2 so to compensate we are adding + * additional 1. We need to revisit and change this logic to be more consistent */ + RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE + 1, + dstSize_tooSmall, + "not enough space to store compressed block"); + if (remaining < blockSize) blockSize = remaining; + + ZSTD_overflowCorrectIfNeeded( + ms, &cctx->workspace, &cctx->appliedParams, ip, ip + blockSize); + ZSTD_checkDictValidity(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd, &ms->dictMatchState); + ZSTD_window_enforceMaxDist(&ms->window, ip, maxDist, &ms->loadedDictEnd, &ms->dictMatchState); + + /* Ensure hash/chain table insertion resumes no sooner than lowlimit */ + if (ms->nextToUpdate < ms->window.lowLimit) ms->nextToUpdate = ms->window.lowLimit; + + { size_t cSize; + if (ZSTD_useTargetCBlockSize(&cctx->appliedParams)) { + cSize = ZSTD_compressBlock_targetCBlockSize(cctx, op, dstCapacity, ip, blockSize, lastBlock); + FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_targetCBlockSize failed"); + assert(cSize > 0); + assert(cSize <= blockSize + ZSTD_blockHeaderSize); + } else if (ZSTD_blockSplitterEnabled(&cctx->appliedParams)) { + cSize = ZSTD_compressBlock_splitBlock(cctx, op, dstCapacity, ip, blockSize, lastBlock); + FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_splitBlock failed"); + assert(cSize > 0 || cctx->seqCollector.collectSequences == 1); + } else { + cSize = ZSTD_compressBlock_internal(cctx, + op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize, + ip, blockSize, 1 /* frame */); + FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_internal failed"); + + if (cSize == 0) { /* block is not compressible */ + cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock); + FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed"); + } else { + U32 const cBlockHeader = cSize == 1 ? + lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) : + lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); + MEM_writeLE24(op, cBlockHeader); + cSize += ZSTD_blockHeaderSize; + } + } /* if (ZSTD_useTargetCBlockSize(&cctx->appliedParams))*/ + + + ip += blockSize; + assert(remaining >= blockSize); + remaining -= blockSize; + op += cSize; + assert(dstCapacity >= cSize); + dstCapacity -= cSize; + cctx->isFirstBlock = 0; + DEBUGLOG(5, "ZSTD_compress_frameChunk: adding a block of size %u", + (unsigned)cSize); + } } + + if (lastFrameChunk && (op>ostart)) cctx->stage = ZSTDcs_ending; + return (size_t)(op-ostart); +} + + +static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity, + const ZSTD_CCtx_params* params, U64 pledgedSrcSize, U32 dictID) +{ BYTE* const op = (BYTE*)dst; + U32 const dictIDSizeCodeLength = (dictID>0) + (dictID>=256) + (dictID>=65536); /* 0-3 */ + U32 const dictIDSizeCode = params->fParams.noDictIDFlag ? 0 : dictIDSizeCodeLength; /* 0-3 */ + U32 const checksumFlag = params->fParams.checksumFlag>0; + U32 const windowSize = (U32)1 << params->cParams.windowLog; + U32 const singleSegment = params->fParams.contentSizeFlag && (windowSize >= pledgedSrcSize); + BYTE const windowLogByte = (BYTE)((params->cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN) << 3); + U32 const fcsCode = params->fParams.contentSizeFlag ? + (pledgedSrcSize>=256) + (pledgedSrcSize>=65536+256) + (pledgedSrcSize>=0xFFFFFFFFU) : 0; /* 0-3 */ + BYTE const frameHeaderDescriptionByte = (BYTE)(dictIDSizeCode + (checksumFlag<<2) + (singleSegment<<5) + (fcsCode<<6) ); + size_t pos=0; + + assert(!(params->fParams.contentSizeFlag && pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN)); + RETURN_ERROR_IF(dstCapacity < ZSTD_FRAMEHEADERSIZE_MAX, dstSize_tooSmall, + "dst buf is too small to fit worst-case frame header size."); + DEBUGLOG(4, "ZSTD_writeFrameHeader : dictIDFlag : %u ; dictID : %u ; dictIDSizeCode : %u", + !params->fParams.noDictIDFlag, (unsigned)dictID, (unsigned)dictIDSizeCode); + if (params->format == ZSTD_f_zstd1) { + MEM_writeLE32(dst, ZSTD_MAGICNUMBER); + pos = 4; + } + op[pos++] = frameHeaderDescriptionByte; + if (!singleSegment) op[pos++] = windowLogByte; + switch(dictIDSizeCode) + { + default: + assert(0); /* impossible */ + ZSTD_FALLTHROUGH; + case 0 : break; + case 1 : op[pos] = (BYTE)(dictID); pos++; break; + case 2 : MEM_writeLE16(op+pos, (U16)dictID); pos+=2; break; + case 3 : MEM_writeLE32(op+pos, dictID); pos+=4; break; + } + switch(fcsCode) + { + default: + assert(0); /* impossible */ + ZSTD_FALLTHROUGH; + case 0 : if (singleSegment) op[pos++] = (BYTE)(pledgedSrcSize); break; + case 1 : MEM_writeLE16(op+pos, (U16)(pledgedSrcSize-256)); pos+=2; break; + case 2 : MEM_writeLE32(op+pos, (U32)(pledgedSrcSize)); pos+=4; break; + case 3 : MEM_writeLE64(op+pos, (U64)(pledgedSrcSize)); pos+=8; break; + } + return pos; +} + +/* ZSTD_writeSkippableFrame_advanced() : + * Writes out a skippable frame with the specified magic number variant (16 are supported), + * from ZSTD_MAGIC_SKIPPABLE_START to ZSTD_MAGIC_SKIPPABLE_START+15, and the desired source data. + * + * Returns the total number of bytes written, or a ZSTD error code. + */ +size_t ZSTD_writeSkippableFrame(void* dst, size_t dstCapacity, + const void* src, size_t srcSize, unsigned magicVariant) { + BYTE* op = (BYTE*)dst; + RETURN_ERROR_IF(dstCapacity < srcSize + ZSTD_SKIPPABLEHEADERSIZE /* Skippable frame overhead */, + dstSize_tooSmall, "Not enough room for skippable frame"); + RETURN_ERROR_IF(srcSize > (unsigned)0xFFFFFFFF, srcSize_wrong, "Src size too large for skippable frame"); + RETURN_ERROR_IF(magicVariant > 15, parameter_outOfBound, "Skippable frame magic number variant not supported"); + + MEM_writeLE32(op, (U32)(ZSTD_MAGIC_SKIPPABLE_START + magicVariant)); + MEM_writeLE32(op+4, (U32)srcSize); + ZSTD_memcpy(op+8, src, srcSize); + return srcSize + ZSTD_SKIPPABLEHEADERSIZE; +} + +/* ZSTD_writeLastEmptyBlock() : + * output an empty Block with end-of-frame mark to complete a frame + * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h)) + * or an error code if `dstCapacity` is too small (stage == ZSTDcs_init); + assert(nbSeq == 0 || cctx->appliedParams.ldmParams.enableLdm != ZSTD_ps_enable); + cctx->externSeqStore.seq = seq; + cctx->externSeqStore.size = nbSeq; + cctx->externSeqStore.capacity = nbSeq; + cctx->externSeqStore.pos = 0; + cctx->externSeqStore.posInSequence = 0; +} + + +static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + U32 frame, U32 lastFrameChunk) +{ + ZSTD_matchState_t* const ms = &cctx->blockState.matchState; + size_t fhSize = 0; + + DEBUGLOG(5, "ZSTD_compressContinue_internal, stage: %u, srcSize: %u", + cctx->stage, (unsigned)srcSize); + RETURN_ERROR_IF(cctx->stage==ZSTDcs_created, stage_wrong, + "missing init (ZSTD_compressBegin)"); + + if (frame && (cctx->stage==ZSTDcs_init)) { + fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams, + cctx->pledgedSrcSizePlusOne-1, cctx->dictID); + FORWARD_IF_ERROR(fhSize, "ZSTD_writeFrameHeader failed"); + assert(fhSize <= dstCapacity); + dstCapacity -= fhSize; + dst = (char*)dst + fhSize; + cctx->stage = ZSTDcs_ongoing; + } + + if (!srcSize) return fhSize; /* do not generate an empty block if no input */ + + if (!ZSTD_window_update(&ms->window, src, srcSize, ms->forceNonContiguous)) { + ms->forceNonContiguous = 0; + ms->nextToUpdate = ms->window.dictLimit; + } + if (cctx->appliedParams.ldmParams.enableLdm == ZSTD_ps_enable) { + ZSTD_window_update(&cctx->ldmState.window, src, srcSize, /* forceNonContiguous */ 0); + } + + if (!frame) { + /* overflow check and correction for block mode */ + ZSTD_overflowCorrectIfNeeded( + ms, &cctx->workspace, &cctx->appliedParams, + src, (BYTE const*)src + srcSize); + } + + DEBUGLOG(5, "ZSTD_compressContinue_internal (blockSize=%u)", (unsigned)cctx->blockSize); + { size_t const cSize = frame ? + ZSTD_compress_frameChunk (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) : + ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize, 0 /* frame */); + FORWARD_IF_ERROR(cSize, "%s", frame ? "ZSTD_compress_frameChunk failed" : "ZSTD_compressBlock_internal failed"); + cctx->consumedSrcSize += srcSize; + cctx->producedCSize += (cSize + fhSize); + assert(!(cctx->appliedParams.fParams.contentSizeFlag && cctx->pledgedSrcSizePlusOne == 0)); + if (cctx->pledgedSrcSizePlusOne != 0) { /* control src size */ + ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN == (unsigned long long)-1); + RETURN_ERROR_IF( + cctx->consumedSrcSize+1 > cctx->pledgedSrcSizePlusOne, + srcSize_wrong, + "error : pledgedSrcSize = %u, while realSrcSize >= %u", + (unsigned)cctx->pledgedSrcSizePlusOne-1, + (unsigned)cctx->consumedSrcSize); + } + return cSize + fhSize; + } +} + +size_t ZSTD_compressContinue_public(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize) +{ + DEBUGLOG(5, "ZSTD_compressContinue (srcSize=%u)", (unsigned)srcSize); + return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1 /* frame mode */, 0 /* last chunk */); +} + +/* NOTE: Must just wrap ZSTD_compressContinue_public() */ +size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize) +{ + return ZSTD_compressContinue_public(cctx, dst, dstCapacity, src, srcSize); +} + +static size_t ZSTD_getBlockSize_deprecated(const ZSTD_CCtx* cctx) +{ + ZSTD_compressionParameters const cParams = cctx->appliedParams.cParams; + assert(!ZSTD_checkCParams(cParams)); + return MIN(cctx->appliedParams.maxBlockSize, (size_t)1 << cParams.windowLog); +} + +/* NOTE: Must just wrap ZSTD_getBlockSize_deprecated() */ +size_t ZSTD_getBlockSize(const ZSTD_CCtx* cctx) +{ + return ZSTD_getBlockSize_deprecated(cctx); +} + +/* NOTE: Must just wrap ZSTD_compressBlock_deprecated() */ +size_t ZSTD_compressBlock_deprecated(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + DEBUGLOG(5, "ZSTD_compressBlock: srcSize = %u", (unsigned)srcSize); + { size_t const blockSizeMax = ZSTD_getBlockSize_deprecated(cctx); + RETURN_ERROR_IF(srcSize > blockSizeMax, srcSize_wrong, "input is larger than a block"); } + + return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0 /* frame mode */, 0 /* last chunk */); +} + +/* NOTE: Must just wrap ZSTD_compressBlock_deprecated() */ +size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + return ZSTD_compressBlock_deprecated(cctx, dst, dstCapacity, src, srcSize); +} + +/*! ZSTD_loadDictionaryContent() : + * @return : 0, or an error code + */ +static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms, + ldmState_t* ls, + ZSTD_cwksp* ws, + ZSTD_CCtx_params const* params, + const void* src, size_t srcSize, + ZSTD_dictTableLoadMethod_e dtlm, + ZSTD_tableFillPurpose_e tfp) +{ + const BYTE* ip = (const BYTE*) src; + const BYTE* const iend = ip + srcSize; + int const loadLdmDict = params->ldmParams.enableLdm == ZSTD_ps_enable && ls != NULL; + + /* Assert that the ms params match the params we're being given */ + ZSTD_assertEqualCParams(params->cParams, ms->cParams); + + { /* Ensure large dictionaries can't cause index overflow */ + + /* Allow the dictionary to set indices up to exactly ZSTD_CURRENT_MAX. + * Dictionaries right at the edge will immediately trigger overflow + * correction, but I don't want to insert extra constraints here. + */ + U32 maxDictSize = ZSTD_CURRENT_MAX - ZSTD_WINDOW_START_INDEX; + + int const CDictTaggedIndices = ZSTD_CDictIndicesAreTagged(¶ms->cParams); + if (CDictTaggedIndices && tfp == ZSTD_tfp_forCDict) { + /* Some dictionary matchfinders in zstd use "short cache", + * which treats the lower ZSTD_SHORT_CACHE_TAG_BITS of each + * CDict hashtable entry as a tag rather than as part of an index. + * When short cache is used, we need to truncate the dictionary + * so that its indices don't overlap with the tag. */ + U32 const shortCacheMaxDictSize = (1u << (32 - ZSTD_SHORT_CACHE_TAG_BITS)) - ZSTD_WINDOW_START_INDEX; + maxDictSize = MIN(maxDictSize, shortCacheMaxDictSize); + assert(!loadLdmDict); + } + + /* If the dictionary is too large, only load the suffix of the dictionary. */ + if (srcSize > maxDictSize) { + ip = iend - maxDictSize; + src = ip; + srcSize = maxDictSize; + } + } + + if (srcSize > ZSTD_CHUNKSIZE_MAX) { + /* We must have cleared our windows when our source is this large. */ + assert(ZSTD_window_isEmpty(ms->window)); + if (loadLdmDict) assert(ZSTD_window_isEmpty(ls->window)); + } + ZSTD_window_update(&ms->window, src, srcSize, /* forceNonContiguous */ 0); + + DEBUGLOG(4, "ZSTD_loadDictionaryContent(): useRowMatchFinder=%d", (int)params->useRowMatchFinder); + + if (loadLdmDict) { /* Load the entire dict into LDM matchfinders. */ + ZSTD_window_update(&ls->window, src, srcSize, /* forceNonContiguous */ 0); + ls->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ls->window.base); + ZSTD_ldm_fillHashTable(ls, ip, iend, ¶ms->ldmParams); + } + + /* If the dict is larger than we can reasonably index in our tables, only load the suffix. */ + if (params->cParams.strategy < ZSTD_btultra) { + U32 maxDictSize = 8U << MIN(MAX(params->cParams.hashLog, params->cParams.chainLog), 28); + if (srcSize > maxDictSize) { + ip = iend - maxDictSize; + src = ip; + srcSize = maxDictSize; + } + } + + ms->nextToUpdate = (U32)(ip - ms->window.base); + ms->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ms->window.base); + ms->forceNonContiguous = params->deterministicRefPrefix; + + if (srcSize <= HASH_READ_SIZE) return 0; + + ZSTD_overflowCorrectIfNeeded(ms, ws, params, ip, iend); + + switch(params->cParams.strategy) + { + case ZSTD_fast: + ZSTD_fillHashTable(ms, iend, dtlm, tfp); + break; + case ZSTD_dfast: +#ifndef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR + ZSTD_fillDoubleHashTable(ms, iend, dtlm, tfp); +#else + assert(0); /* shouldn't be called: cparams should've been adjusted. */ +#endif + break; + + case ZSTD_greedy: + case ZSTD_lazy: + case ZSTD_lazy2: +#if !defined(ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR) \ + || !defined(ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR) \ + || !defined(ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR) + assert(srcSize >= HASH_READ_SIZE); + if (ms->dedicatedDictSearch) { + assert(ms->chainTable != NULL); + ZSTD_dedicatedDictSearch_lazy_loadDictionary(ms, iend-HASH_READ_SIZE); + } else { + assert(params->useRowMatchFinder != ZSTD_ps_auto); + if (params->useRowMatchFinder == ZSTD_ps_enable) { + size_t const tagTableSize = ((size_t)1 << params->cParams.hashLog); + ZSTD_memset(ms->tagTable, 0, tagTableSize); + ZSTD_row_update(ms, iend-HASH_READ_SIZE); + DEBUGLOG(4, "Using row-based hash table for lazy dict"); + } else { + ZSTD_insertAndFindFirstIndex(ms, iend-HASH_READ_SIZE); + DEBUGLOG(4, "Using chain-based hash table for lazy dict"); + } + } +#else + assert(0); /* shouldn't be called: cparams should've been adjusted. */ +#endif + break; + + case ZSTD_btlazy2: /* we want the dictionary table fully sorted */ + case ZSTD_btopt: + case ZSTD_btultra: + case ZSTD_btultra2: +#if !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR) \ + || !defined(ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR) \ + || !defined(ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR) + assert(srcSize >= HASH_READ_SIZE); + ZSTD_updateTree(ms, iend-HASH_READ_SIZE, iend); +#else + assert(0); /* shouldn't be called: cparams should've been adjusted. */ +#endif + break; + + default: + assert(0); /* not possible : not a valid strategy id */ + } + + ms->nextToUpdate = (U32)(iend - ms->window.base); + return 0; +} + + +/* Dictionaries that assign zero probability to symbols that show up causes problems + * when FSE encoding. Mark dictionaries with zero probability symbols as FSE_repeat_check + * and only dictionaries with 100% valid symbols can be assumed valid. + */ +static FSE_repeat ZSTD_dictNCountRepeat(short* normalizedCounter, unsigned dictMaxSymbolValue, unsigned maxSymbolValue) +{ + U32 s; + if (dictMaxSymbolValue < maxSymbolValue) { + return FSE_repeat_check; + } + for (s = 0; s <= maxSymbolValue; ++s) { + if (normalizedCounter[s] == 0) { + return FSE_repeat_check; + } + } + return FSE_repeat_valid; +} + +size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace, + const void* const dict, size_t dictSize) +{ + short offcodeNCount[MaxOff+1]; + unsigned offcodeMaxValue = MaxOff; + const BYTE* dictPtr = (const BYTE*)dict; /* skip magic num and dict ID */ + const BYTE* const dictEnd = dictPtr + dictSize; + dictPtr += 8; + bs->entropy.huf.repeatMode = HUF_repeat_check; + + { unsigned maxSymbolValue = 255; + unsigned hasZeroWeights = 1; + size_t const hufHeaderSize = HUF_readCTable((HUF_CElt*)bs->entropy.huf.CTable, &maxSymbolValue, dictPtr, + dictEnd-dictPtr, &hasZeroWeights); + + /* We only set the loaded table as valid if it contains all non-zero + * weights. Otherwise, we set it to check */ + if (!hasZeroWeights && maxSymbolValue == 255) + bs->entropy.huf.repeatMode = HUF_repeat_valid; + + RETURN_ERROR_IF(HUF_isError(hufHeaderSize), dictionary_corrupted, ""); + dictPtr += hufHeaderSize; + } + + { unsigned offcodeLog; + size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr); + RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted, ""); + RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted, ""); + /* fill all offset symbols to avoid garbage at end of table */ + RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp( + bs->entropy.fse.offcodeCTable, + offcodeNCount, MaxOff, offcodeLog, + workspace, HUF_WORKSPACE_SIZE)), + dictionary_corrupted, ""); + /* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */ + dictPtr += offcodeHeaderSize; + } + + { short matchlengthNCount[MaxML+1]; + unsigned matchlengthMaxValue = MaxML, matchlengthLog; + size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr); + RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted, ""); + RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted, ""); + RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp( + bs->entropy.fse.matchlengthCTable, + matchlengthNCount, matchlengthMaxValue, matchlengthLog, + workspace, HUF_WORKSPACE_SIZE)), + dictionary_corrupted, ""); + bs->entropy.fse.matchlength_repeatMode = ZSTD_dictNCountRepeat(matchlengthNCount, matchlengthMaxValue, MaxML); + dictPtr += matchlengthHeaderSize; + } + + { short litlengthNCount[MaxLL+1]; + unsigned litlengthMaxValue = MaxLL, litlengthLog; + size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr); + RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted, ""); + RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted, ""); + RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp( + bs->entropy.fse.litlengthCTable, + litlengthNCount, litlengthMaxValue, litlengthLog, + workspace, HUF_WORKSPACE_SIZE)), + dictionary_corrupted, ""); + bs->entropy.fse.litlength_repeatMode = ZSTD_dictNCountRepeat(litlengthNCount, litlengthMaxValue, MaxLL); + dictPtr += litlengthHeaderSize; + } + + RETURN_ERROR_IF(dictPtr+12 > dictEnd, dictionary_corrupted, ""); + bs->rep[0] = MEM_readLE32(dictPtr+0); + bs->rep[1] = MEM_readLE32(dictPtr+4); + bs->rep[2] = MEM_readLE32(dictPtr+8); + dictPtr += 12; + + { size_t const dictContentSize = (size_t)(dictEnd - dictPtr); + U32 offcodeMax = MaxOff; + if (dictContentSize <= ((U32)-1) - 128 KB) { + U32 const maxOffset = (U32)dictContentSize + 128 KB; /* The maximum offset that must be supported */ + offcodeMax = ZSTD_highbit32(maxOffset); /* Calculate minimum offset code required to represent maxOffset */ + } + /* All offset values <= dictContentSize + 128 KB must be representable for a valid table */ + bs->entropy.fse.offcode_repeatMode = ZSTD_dictNCountRepeat(offcodeNCount, offcodeMaxValue, MIN(offcodeMax, MaxOff)); + + /* All repCodes must be <= dictContentSize and != 0 */ + { U32 u; + for (u=0; u<3; u++) { + RETURN_ERROR_IF(bs->rep[u] == 0, dictionary_corrupted, ""); + RETURN_ERROR_IF(bs->rep[u] > dictContentSize, dictionary_corrupted, ""); + } } } + + return dictPtr - (const BYTE*)dict; +} + +/* Dictionary format : + * See : + * https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#dictionary-format + */ +/*! ZSTD_loadZstdDictionary() : + * @return : dictID, or an error code + * assumptions : magic number supposed already checked + * dictSize supposed >= 8 + */ +static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs, + ZSTD_matchState_t* ms, + ZSTD_cwksp* ws, + ZSTD_CCtx_params const* params, + const void* dict, size_t dictSize, + ZSTD_dictTableLoadMethod_e dtlm, + ZSTD_tableFillPurpose_e tfp, + void* workspace) +{ + const BYTE* dictPtr = (const BYTE*)dict; + const BYTE* const dictEnd = dictPtr + dictSize; + size_t dictID; + size_t eSize; + ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<= 8); + assert(MEM_readLE32(dictPtr) == ZSTD_MAGIC_DICTIONARY); + + dictID = params->fParams.noDictIDFlag ? 0 : MEM_readLE32(dictPtr + 4 /* skip magic number */ ); + eSize = ZSTD_loadCEntropy(bs, workspace, dict, dictSize); + FORWARD_IF_ERROR(eSize, "ZSTD_loadCEntropy failed"); + dictPtr += eSize; + + { + size_t const dictContentSize = (size_t)(dictEnd - dictPtr); + FORWARD_IF_ERROR(ZSTD_loadDictionaryContent( + ms, NULL, ws, params, dictPtr, dictContentSize, dtlm, tfp), ""); + } + return dictID; +} + +/** ZSTD_compress_insertDictionary() : +* @return : dictID, or an error code */ +static size_t +ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs, + ZSTD_matchState_t* ms, + ldmState_t* ls, + ZSTD_cwksp* ws, + const ZSTD_CCtx_params* params, + const void* dict, size_t dictSize, + ZSTD_dictContentType_e dictContentType, + ZSTD_dictTableLoadMethod_e dtlm, + ZSTD_tableFillPurpose_e tfp, + void* workspace) +{ + DEBUGLOG(4, "ZSTD_compress_insertDictionary (dictSize=%u)", (U32)dictSize); + if ((dict==NULL) || (dictSize<8)) { + RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong, ""); + return 0; + } + + ZSTD_reset_compressedBlockState(bs); + + /* dict restricted modes */ + if (dictContentType == ZSTD_dct_rawContent) + return ZSTD_loadDictionaryContent(ms, ls, ws, params, dict, dictSize, dtlm, tfp); + + if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) { + if (dictContentType == ZSTD_dct_auto) { + DEBUGLOG(4, "raw content dictionary detected"); + return ZSTD_loadDictionaryContent( + ms, ls, ws, params, dict, dictSize, dtlm, tfp); + } + RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong, ""); + assert(0); /* impossible */ + } + + /* dict as full zstd dictionary */ + return ZSTD_loadZstdDictionary( + bs, ms, ws, params, dict, dictSize, dtlm, tfp, workspace); +} + +#define ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF (128 KB) +#define ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER (6ULL) + +/*! ZSTD_compressBegin_internal() : + * Assumption : either @dict OR @cdict (or none) is non-NULL, never both + * @return : 0, or an error code */ +static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, + const void* dict, size_t dictSize, + ZSTD_dictContentType_e dictContentType, + ZSTD_dictTableLoadMethod_e dtlm, + const ZSTD_CDict* cdict, + const ZSTD_CCtx_params* params, U64 pledgedSrcSize, + ZSTD_buffered_policy_e zbuff) +{ + size_t const dictContentSize = cdict ? cdict->dictContentSize : dictSize; +#if ZSTD_TRACE + cctx->traceCtx = (ZSTD_trace_compress_begin != NULL) ? ZSTD_trace_compress_begin(cctx) : 0; +#endif + DEBUGLOG(4, "ZSTD_compressBegin_internal: wlog=%u", params->cParams.windowLog); + /* params are supposed to be fully validated at this point */ + assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams))); + assert(!((dict) && (cdict))); /* either dict or cdict, not both */ + if ( (cdict) + && (cdict->dictContentSize > 0) + && ( pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF + || pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER + || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN + || cdict->compressionLevel == 0) + && (params->attachDictPref != ZSTD_dictForceLoad) ) { + return ZSTD_resetCCtx_usingCDict(cctx, cdict, params, pledgedSrcSize, zbuff); + } + + FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, + dictContentSize, + ZSTDcrp_makeClean, zbuff) , ""); + { size_t const dictID = cdict ? + ZSTD_compress_insertDictionary( + cctx->blockState.prevCBlock, &cctx->blockState.matchState, + &cctx->ldmState, &cctx->workspace, &cctx->appliedParams, cdict->dictContent, + cdict->dictContentSize, cdict->dictContentType, dtlm, + ZSTD_tfp_forCCtx, cctx->entropyWorkspace) + : ZSTD_compress_insertDictionary( + cctx->blockState.prevCBlock, &cctx->blockState.matchState, + &cctx->ldmState, &cctx->workspace, &cctx->appliedParams, dict, dictSize, + dictContentType, dtlm, ZSTD_tfp_forCCtx, cctx->entropyWorkspace); + FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed"); + assert(dictID <= UINT_MAX); + cctx->dictID = (U32)dictID; + cctx->dictContentSize = dictContentSize; + } + return 0; +} + +size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx, + const void* dict, size_t dictSize, + ZSTD_dictContentType_e dictContentType, + ZSTD_dictTableLoadMethod_e dtlm, + const ZSTD_CDict* cdict, + const ZSTD_CCtx_params* params, + unsigned long long pledgedSrcSize) +{ + DEBUGLOG(4, "ZSTD_compressBegin_advanced_internal: wlog=%u", params->cParams.windowLog); + /* compression parameters verification and optimization */ + FORWARD_IF_ERROR( ZSTD_checkCParams(params->cParams) , ""); + return ZSTD_compressBegin_internal(cctx, + dict, dictSize, dictContentType, dtlm, + cdict, + params, pledgedSrcSize, + ZSTDb_not_buffered); +} + +/*! ZSTD_compressBegin_advanced() : +* @return : 0, or an error code */ +size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, + const void* dict, size_t dictSize, + ZSTD_parameters params, unsigned long long pledgedSrcSize) +{ + ZSTD_CCtx_params cctxParams; + ZSTD_CCtxParams_init_internal(&cctxParams, ¶ms, ZSTD_NO_CLEVEL); + return ZSTD_compressBegin_advanced_internal(cctx, + dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, + NULL /*cdict*/, + &cctxParams, pledgedSrcSize); +} + +static size_t +ZSTD_compressBegin_usingDict_deprecated(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel) +{ + ZSTD_CCtx_params cctxParams; + { ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_noAttachDict); + ZSTD_CCtxParams_init_internal(&cctxParams, ¶ms, (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : compressionLevel); + } + DEBUGLOG(4, "ZSTD_compressBegin_usingDict (dictSize=%u)", (unsigned)dictSize); + return ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL, + &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, ZSTDb_not_buffered); +} + +size_t +ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel) +{ + return ZSTD_compressBegin_usingDict_deprecated(cctx, dict, dictSize, compressionLevel); +} + +size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel) +{ + return ZSTD_compressBegin_usingDict_deprecated(cctx, NULL, 0, compressionLevel); +} + + +/*! ZSTD_writeEpilogue() : +* Ends a frame. +* @return : nb of bytes written into dst (or an error code) */ +static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity) +{ + BYTE* const ostart = (BYTE*)dst; + BYTE* op = ostart; + + DEBUGLOG(4, "ZSTD_writeEpilogue"); + RETURN_ERROR_IF(cctx->stage == ZSTDcs_created, stage_wrong, "init missing"); + + /* special case : empty frame */ + if (cctx->stage == ZSTDcs_init) { + size_t fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams, 0, 0); + FORWARD_IF_ERROR(fhSize, "ZSTD_writeFrameHeader failed"); + dstCapacity -= fhSize; + op += fhSize; + cctx->stage = ZSTDcs_ongoing; + } + + if (cctx->stage != ZSTDcs_ending) { + /* write one last empty block, make it the "last" block */ + U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1) + 0; + ZSTD_STATIC_ASSERT(ZSTD_BLOCKHEADERSIZE == 3); + RETURN_ERROR_IF(dstCapacity<3, dstSize_tooSmall, "no room for epilogue"); + MEM_writeLE24(op, cBlockHeader24); + op += ZSTD_blockHeaderSize; + dstCapacity -= ZSTD_blockHeaderSize; + } + + if (cctx->appliedParams.fParams.checksumFlag) { + U32 const checksum = (U32) XXH64_digest(&cctx->xxhState); + RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for checksum"); + DEBUGLOG(4, "ZSTD_writeEpilogue: write checksum : %08X", (unsigned)checksum); + MEM_writeLE32(op, checksum); + op += 4; + } + + cctx->stage = ZSTDcs_created; /* return to "created but no init" status */ + return op-ostart; +} + +void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize) +{ +#if ZSTD_TRACE + if (cctx->traceCtx && ZSTD_trace_compress_end != NULL) { + int const streaming = cctx->inBuffSize > 0 || cctx->outBuffSize > 0 || cctx->appliedParams.nbWorkers > 0; + ZSTD_Trace trace; + ZSTD_memset(&trace, 0, sizeof(trace)); + trace.version = ZSTD_VERSION_NUMBER; + trace.streaming = streaming; + trace.dictionaryID = cctx->dictID; + trace.dictionarySize = cctx->dictContentSize; + trace.uncompressedSize = cctx->consumedSrcSize; + trace.compressedSize = cctx->producedCSize + extraCSize; + trace.params = &cctx->appliedParams; + trace.cctx = cctx; + ZSTD_trace_compress_end(cctx->traceCtx, &trace); + } + cctx->traceCtx = 0; +#else + (void)cctx; + (void)extraCSize; +#endif +} + +size_t ZSTD_compressEnd_public(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize) +{ + size_t endResult; + size_t const cSize = ZSTD_compressContinue_internal(cctx, + dst, dstCapacity, src, srcSize, + 1 /* frame mode */, 1 /* last chunk */); + FORWARD_IF_ERROR(cSize, "ZSTD_compressContinue_internal failed"); + endResult = ZSTD_writeEpilogue(cctx, (char*)dst + cSize, dstCapacity-cSize); + FORWARD_IF_ERROR(endResult, "ZSTD_writeEpilogue failed"); + assert(!(cctx->appliedParams.fParams.contentSizeFlag && cctx->pledgedSrcSizePlusOne == 0)); + if (cctx->pledgedSrcSizePlusOne != 0) { /* control src size */ + ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN == (unsigned long long)-1); + DEBUGLOG(4, "end of frame : controlling src size"); + RETURN_ERROR_IF( + cctx->pledgedSrcSizePlusOne != cctx->consumedSrcSize+1, + srcSize_wrong, + "error : pledgedSrcSize = %u, while realSrcSize = %u", + (unsigned)cctx->pledgedSrcSizePlusOne-1, + (unsigned)cctx->consumedSrcSize); + } + ZSTD_CCtx_trace(cctx, endResult); + return cSize + endResult; +} + +/* NOTE: Must just wrap ZSTD_compressEnd_public() */ +size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize) +{ + return ZSTD_compressEnd_public(cctx, dst, dstCapacity, src, srcSize); +} + +size_t ZSTD_compress_advanced (ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + ZSTD_parameters params) +{ + DEBUGLOG(4, "ZSTD_compress_advanced"); + FORWARD_IF_ERROR(ZSTD_checkCParams(params.cParams), ""); + ZSTD_CCtxParams_init_internal(&cctx->simpleApiParams, ¶ms, ZSTD_NO_CLEVEL); + return ZSTD_compress_advanced_internal(cctx, + dst, dstCapacity, + src, srcSize, + dict, dictSize, + &cctx->simpleApiParams); +} + +/* Internal */ +size_t ZSTD_compress_advanced_internal( + ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + const ZSTD_CCtx_params* params) +{ + DEBUGLOG(4, "ZSTD_compress_advanced_internal (srcSize:%u)", (unsigned)srcSize); + FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx, + dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL, + params, srcSize, ZSTDb_not_buffered) , ""); + return ZSTD_compressEnd_public(cctx, dst, dstCapacity, src, srcSize); +} + +size_t ZSTD_compress_usingDict(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict, size_t dictSize, + int compressionLevel) +{ + { + ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, srcSize, dict ? dictSize : 0, ZSTD_cpm_noAttachDict); + assert(params.fParams.contentSizeFlag == 1); + ZSTD_CCtxParams_init_internal(&cctx->simpleApiParams, ¶ms, (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT: compressionLevel); + } + DEBUGLOG(4, "ZSTD_compress_usingDict (srcSize=%u)", (unsigned)srcSize); + return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, dict, dictSize, &cctx->simpleApiParams); +} + +size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + int compressionLevel) +{ + DEBUGLOG(4, "ZSTD_compressCCtx (srcSize=%u)", (unsigned)srcSize); + assert(cctx != NULL); + return ZSTD_compress_usingDict(cctx, dst, dstCapacity, src, srcSize, NULL, 0, compressionLevel); +} + +size_t ZSTD_compress(void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + int compressionLevel) +{ + size_t result; +#if ZSTD_COMPRESS_HEAPMODE + ZSTD_CCtx* cctx = ZSTD_createCCtx(); + RETURN_ERROR_IF(!cctx, memory_allocation, "ZSTD_createCCtx failed"); + result = ZSTD_compressCCtx(cctx, dst, dstCapacity, src, srcSize, compressionLevel); + ZSTD_freeCCtx(cctx); +#else + ZSTD_CCtx ctxBody; + ZSTD_initCCtx(&ctxBody, ZSTD_defaultCMem); + result = ZSTD_compressCCtx(&ctxBody, dst, dstCapacity, src, srcSize, compressionLevel); + ZSTD_freeCCtxContent(&ctxBody); /* can't free ctxBody itself, as it's on stack; free only heap content */ +#endif + return result; +} + + +/* ===== Dictionary API ===== */ + +/*! ZSTD_estimateCDictSize_advanced() : + * Estimate amount of memory that will be needed to create a dictionary with following arguments */ +size_t ZSTD_estimateCDictSize_advanced( + size_t dictSize, ZSTD_compressionParameters cParams, + ZSTD_dictLoadMethod_e dictLoadMethod) +{ + DEBUGLOG(5, "sizeof(ZSTD_CDict) : %u", (unsigned)sizeof(ZSTD_CDict)); + return ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) + + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) + /* enableDedicatedDictSearch == 1 ensures that CDict estimation will not be too small + * in case we are using DDS with row-hash. */ + + ZSTD_sizeof_matchState(&cParams, ZSTD_resolveRowMatchFinderMode(ZSTD_ps_auto, &cParams), + /* enableDedicatedDictSearch */ 1, /* forCCtx */ 0) + + (dictLoadMethod == ZSTD_dlm_byRef ? 0 + : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void *)))); +} + +size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel) +{ + ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict); + return ZSTD_estimateCDictSize_advanced(dictSize, cParams, ZSTD_dlm_byCopy); +} + +size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict) +{ + if (cdict==NULL) return 0; /* support sizeof on NULL */ + DEBUGLOG(5, "sizeof(*cdict) : %u", (unsigned)sizeof(*cdict)); + /* cdict may be in the workspace */ + return (cdict->workspace.workspace == cdict ? 0 : sizeof(*cdict)) + + ZSTD_cwksp_sizeof(&cdict->workspace); +} + +static size_t ZSTD_initCDict_internal( + ZSTD_CDict* cdict, + const void* dictBuffer, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_CCtx_params params) +{ + DEBUGLOG(3, "ZSTD_initCDict_internal (dictContentType:%u)", (unsigned)dictContentType); + assert(!ZSTD_checkCParams(params.cParams)); + cdict->matchState.cParams = params.cParams; + cdict->matchState.dedicatedDictSearch = params.enableDedicatedDictSearch; + if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dictBuffer) || (!dictSize)) { + cdict->dictContent = dictBuffer; + } else { + void *internalBuffer = ZSTD_cwksp_reserve_object(&cdict->workspace, ZSTD_cwksp_align(dictSize, sizeof(void*))); + RETURN_ERROR_IF(!internalBuffer, memory_allocation, "NULL pointer!"); + cdict->dictContent = internalBuffer; + ZSTD_memcpy(internalBuffer, dictBuffer, dictSize); + } + cdict->dictContentSize = dictSize; + cdict->dictContentType = dictContentType; + + cdict->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object(&cdict->workspace, HUF_WORKSPACE_SIZE); + + + /* Reset the state to no dictionary */ + ZSTD_reset_compressedBlockState(&cdict->cBlockState); + FORWARD_IF_ERROR(ZSTD_reset_matchState( + &cdict->matchState, + &cdict->workspace, + ¶ms.cParams, + params.useRowMatchFinder, + ZSTDcrp_makeClean, + ZSTDirp_reset, + ZSTD_resetTarget_CDict), ""); + /* (Maybe) load the dictionary + * Skips loading the dictionary if it is < 8 bytes. + */ + { params.compressionLevel = ZSTD_CLEVEL_DEFAULT; + params.fParams.contentSizeFlag = 1; + { size_t const dictID = ZSTD_compress_insertDictionary( + &cdict->cBlockState, &cdict->matchState, NULL, &cdict->workspace, + ¶ms, cdict->dictContent, cdict->dictContentSize, + dictContentType, ZSTD_dtlm_full, ZSTD_tfp_forCDict, cdict->entropyWorkspace); + FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed"); + assert(dictID <= (size_t)(U32)-1); + cdict->dictID = (U32)dictID; + } + } + + return 0; +} + +static ZSTD_CDict* ZSTD_createCDict_advanced_internal(size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_compressionParameters cParams, + ZSTD_paramSwitch_e useRowMatchFinder, + U32 enableDedicatedDictSearch, + ZSTD_customMem customMem) +{ + if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL; + + { size_t const workspaceSize = + ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) + + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) + + ZSTD_sizeof_matchState(&cParams, useRowMatchFinder, enableDedicatedDictSearch, /* forCCtx */ 0) + + (dictLoadMethod == ZSTD_dlm_byRef ? 0 + : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*)))); + void* const workspace = ZSTD_customMalloc(workspaceSize, customMem); + ZSTD_cwksp ws; + ZSTD_CDict* cdict; + + if (!workspace) { + ZSTD_customFree(workspace, customMem); + return NULL; + } + + ZSTD_cwksp_init(&ws, workspace, workspaceSize, ZSTD_cwksp_dynamic_alloc); + + cdict = (ZSTD_CDict*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CDict)); + assert(cdict != NULL); + ZSTD_cwksp_move(&cdict->workspace, &ws); + cdict->customMem = customMem; + cdict->compressionLevel = ZSTD_NO_CLEVEL; /* signals advanced API usage */ + cdict->useRowMatchFinder = useRowMatchFinder; + return cdict; + } +} + +ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_compressionParameters cParams, + ZSTD_customMem customMem) +{ + ZSTD_CCtx_params cctxParams; + ZSTD_memset(&cctxParams, 0, sizeof(cctxParams)); + ZSTD_CCtxParams_init(&cctxParams, 0); + cctxParams.cParams = cParams; + cctxParams.customMem = customMem; + return ZSTD_createCDict_advanced2( + dictBuffer, dictSize, + dictLoadMethod, dictContentType, + &cctxParams, customMem); +} + +ZSTD_CDict* ZSTD_createCDict_advanced2( + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + const ZSTD_CCtx_params* originalCctxParams, + ZSTD_customMem customMem) +{ + ZSTD_CCtx_params cctxParams = *originalCctxParams; + ZSTD_compressionParameters cParams; + ZSTD_CDict* cdict; + + DEBUGLOG(3, "ZSTD_createCDict_advanced2, mode %u", (unsigned)dictContentType); + if (!customMem.customAlloc ^ !customMem.customFree) return NULL; + + if (cctxParams.enableDedicatedDictSearch) { + cParams = ZSTD_dedicatedDictSearch_getCParams( + cctxParams.compressionLevel, dictSize); + ZSTD_overrideCParams(&cParams, &cctxParams.cParams); + } else { + cParams = ZSTD_getCParamsFromCCtxParams( + &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict); + } + + if (!ZSTD_dedicatedDictSearch_isSupported(&cParams)) { + /* Fall back to non-DDSS params */ + cctxParams.enableDedicatedDictSearch = 0; + cParams = ZSTD_getCParamsFromCCtxParams( + &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict); + } + + DEBUGLOG(3, "ZSTD_createCDict_advanced2: DDS: %u", cctxParams.enableDedicatedDictSearch); + cctxParams.cParams = cParams; + cctxParams.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams.useRowMatchFinder, &cParams); + + cdict = ZSTD_createCDict_advanced_internal(dictSize, + dictLoadMethod, cctxParams.cParams, + cctxParams.useRowMatchFinder, cctxParams.enableDedicatedDictSearch, + customMem); + + if (!cdict || ZSTD_isError( ZSTD_initCDict_internal(cdict, + dict, dictSize, + dictLoadMethod, dictContentType, + cctxParams) )) { + ZSTD_freeCDict(cdict); + return NULL; + } + + return cdict; +} + +ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel) +{ + ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict); + ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dict, dictSize, + ZSTD_dlm_byCopy, ZSTD_dct_auto, + cParams, ZSTD_defaultCMem); + if (cdict) + cdict->compressionLevel = (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : compressionLevel; + return cdict; +} + +ZSTD_CDict* ZSTD_createCDict_byReference(const void* dict, size_t dictSize, int compressionLevel) +{ + ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict); + ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dict, dictSize, + ZSTD_dlm_byRef, ZSTD_dct_auto, + cParams, ZSTD_defaultCMem); + if (cdict) + cdict->compressionLevel = (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : compressionLevel; + return cdict; +} + +size_t ZSTD_freeCDict(ZSTD_CDict* cdict) +{ + if (cdict==NULL) return 0; /* support free on NULL */ + { ZSTD_customMem const cMem = cdict->customMem; + int cdictInWorkspace = ZSTD_cwksp_owns_buffer(&cdict->workspace, cdict); + ZSTD_cwksp_free(&cdict->workspace, cMem); + if (!cdictInWorkspace) { + ZSTD_customFree(cdict, cMem); + } + return 0; + } +} + +/*! ZSTD_initStaticCDict_advanced() : + * Generate a digested dictionary in provided memory area. + * workspace: The memory area to emplace the dictionary into. + * Provided pointer must 8-bytes aligned. + * It must outlive dictionary usage. + * workspaceSize: Use ZSTD_estimateCDictSize() + * to determine how large workspace must be. + * cParams : use ZSTD_getCParams() to transform a compression level + * into its relevants cParams. + * @return : pointer to ZSTD_CDict*, or NULL if error (size too small) + * Note : there is no corresponding "free" function. + * Since workspace was allocated externally, it must be freed externally. + */ +const ZSTD_CDict* ZSTD_initStaticCDict( + void* workspace, size_t workspaceSize, + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_compressionParameters cParams) +{ + ZSTD_paramSwitch_e const useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(ZSTD_ps_auto, &cParams); + /* enableDedicatedDictSearch == 1 ensures matchstate is not too small in case this CDict will be used for DDS + row hash */ + size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, useRowMatchFinder, /* enableDedicatedDictSearch */ 1, /* forCCtx */ 0); + size_t const neededSize = ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) + + (dictLoadMethod == ZSTD_dlm_byRef ? 0 + : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*)))) + + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) + + matchStateSize; + ZSTD_CDict* cdict; + ZSTD_CCtx_params params; + + if ((size_t)workspace & 7) return NULL; /* 8-aligned */ + + { + ZSTD_cwksp ws; + ZSTD_cwksp_init(&ws, workspace, workspaceSize, ZSTD_cwksp_static_alloc); + cdict = (ZSTD_CDict*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CDict)); + if (cdict == NULL) return NULL; + ZSTD_cwksp_move(&cdict->workspace, &ws); + } + + DEBUGLOG(4, "(workspaceSize < neededSize) : (%u < %u) => %u", + (unsigned)workspaceSize, (unsigned)neededSize, (unsigned)(workspaceSize < neededSize)); + if (workspaceSize < neededSize) return NULL; + + ZSTD_CCtxParams_init(¶ms, 0); + params.cParams = cParams; + params.useRowMatchFinder = useRowMatchFinder; + cdict->useRowMatchFinder = useRowMatchFinder; + cdict->compressionLevel = ZSTD_NO_CLEVEL; + + if (ZSTD_isError( ZSTD_initCDict_internal(cdict, + dict, dictSize, + dictLoadMethod, dictContentType, + params) )) + return NULL; + + return cdict; +} + +ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict) +{ + assert(cdict != NULL); + return cdict->matchState.cParams; +} + +/*! ZSTD_getDictID_fromCDict() : + * Provides the dictID of the dictionary loaded into `cdict`. + * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. + * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ +unsigned ZSTD_getDictID_fromCDict(const ZSTD_CDict* cdict) +{ + if (cdict==NULL) return 0; + return cdict->dictID; +} + +/* ZSTD_compressBegin_usingCDict_internal() : + * Implementation of various ZSTD_compressBegin_usingCDict* functions. + */ +static size_t ZSTD_compressBegin_usingCDict_internal( + ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, + ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize) +{ + ZSTD_CCtx_params cctxParams; + DEBUGLOG(4, "ZSTD_compressBegin_usingCDict_internal"); + RETURN_ERROR_IF(cdict==NULL, dictionary_wrong, "NULL pointer!"); + /* Initialize the cctxParams from the cdict */ + { + ZSTD_parameters params; + params.fParams = fParams; + params.cParams = ( pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF + || pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER + || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN + || cdict->compressionLevel == 0 ) ? + ZSTD_getCParamsFromCDict(cdict) + : ZSTD_getCParams(cdict->compressionLevel, + pledgedSrcSize, + cdict->dictContentSize); + ZSTD_CCtxParams_init_internal(&cctxParams, ¶ms, cdict->compressionLevel); + } + /* Increase window log to fit the entire dictionary and source if the + * source size is known. Limit the increase to 19, which is the + * window log for compression level 1 with the largest source size. + */ + if (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN) { + U32 const limitedSrcSize = (U32)MIN(pledgedSrcSize, 1U << 19); + U32 const limitedSrcLog = limitedSrcSize > 1 ? ZSTD_highbit32(limitedSrcSize - 1) + 1 : 1; + cctxParams.cParams.windowLog = MAX(cctxParams.cParams.windowLog, limitedSrcLog); + } + return ZSTD_compressBegin_internal(cctx, + NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast, + cdict, + &cctxParams, pledgedSrcSize, + ZSTDb_not_buffered); +} + + +/* ZSTD_compressBegin_usingCDict_advanced() : + * This function is DEPRECATED. + * cdict must be != NULL */ +size_t ZSTD_compressBegin_usingCDict_advanced( + ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, + ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize) +{ + return ZSTD_compressBegin_usingCDict_internal(cctx, cdict, fParams, pledgedSrcSize); +} + +/* ZSTD_compressBegin_usingCDict() : + * cdict must be != NULL */ +size_t ZSTD_compressBegin_usingCDict_deprecated(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict) +{ + ZSTD_frameParameters const fParams = { 0 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ }; + return ZSTD_compressBegin_usingCDict_internal(cctx, cdict, fParams, ZSTD_CONTENTSIZE_UNKNOWN); +} + +size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict) +{ + return ZSTD_compressBegin_usingCDict_deprecated(cctx, cdict); +} + +/*! ZSTD_compress_usingCDict_internal(): + * Implementation of various ZSTD_compress_usingCDict* functions. + */ +static size_t ZSTD_compress_usingCDict_internal(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_CDict* cdict, ZSTD_frameParameters fParams) +{ + FORWARD_IF_ERROR(ZSTD_compressBegin_usingCDict_internal(cctx, cdict, fParams, srcSize), ""); /* will check if cdict != NULL */ + return ZSTD_compressEnd_public(cctx, dst, dstCapacity, src, srcSize); +} + +/*! ZSTD_compress_usingCDict_advanced(): + * This function is DEPRECATED. + */ +size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_CDict* cdict, ZSTD_frameParameters fParams) +{ + return ZSTD_compress_usingCDict_internal(cctx, dst, dstCapacity, src, srcSize, cdict, fParams); +} + +/*! ZSTD_compress_usingCDict() : + * Compression using a digested Dictionary. + * Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times. + * Note that compression parameters are decided at CDict creation time + * while frame parameters are hardcoded */ +size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_CDict* cdict) +{ + ZSTD_frameParameters const fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ }; + return ZSTD_compress_usingCDict_internal(cctx, dst, dstCapacity, src, srcSize, cdict, fParams); +} + + + +/* ****************************************************************** +* Streaming +********************************************************************/ + +ZSTD_CStream* ZSTD_createCStream(void) +{ + DEBUGLOG(3, "ZSTD_createCStream"); + return ZSTD_createCStream_advanced(ZSTD_defaultCMem); +} + +ZSTD_CStream* ZSTD_initStaticCStream(void *workspace, size_t workspaceSize) +{ + return ZSTD_initStaticCCtx(workspace, workspaceSize); +} + +ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem) +{ /* CStream and CCtx are now same object */ + return ZSTD_createCCtx_advanced(customMem); +} + +size_t ZSTD_freeCStream(ZSTD_CStream* zcs) +{ + return ZSTD_freeCCtx(zcs); /* same object */ +} + + + +/*====== Initialization ======*/ + +size_t ZSTD_CStreamInSize(void) { return ZSTD_BLOCKSIZE_MAX; } + +size_t ZSTD_CStreamOutSize(void) +{ + return ZSTD_compressBound(ZSTD_BLOCKSIZE_MAX) + ZSTD_blockHeaderSize + 4 /* 32-bits hash */ ; +} + +static ZSTD_cParamMode_e ZSTD_getCParamMode(ZSTD_CDict const* cdict, ZSTD_CCtx_params const* params, U64 pledgedSrcSize) +{ + if (cdict != NULL && ZSTD_shouldAttachDict(cdict, params, pledgedSrcSize)) + return ZSTD_cpm_attachDict; + else + return ZSTD_cpm_noAttachDict; +} + +/* ZSTD_resetCStream(): + * pledgedSrcSize == 0 means "unknown" */ +size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pss) +{ + /* temporary : 0 interpreted as "unknown" during transition period. + * Users willing to specify "unknown" **must** use ZSTD_CONTENTSIZE_UNKNOWN. + * 0 will be interpreted as "empty" in the future. + */ + U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss; + DEBUGLOG(4, "ZSTD_resetCStream: pledgedSrcSize = %u", (unsigned)pledgedSrcSize); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , ""); + return 0; +} + +/*! ZSTD_initCStream_internal() : + * Note : for lib/compress only. Used by zstdmt_compress.c. + * Assumption 1 : params are valid + * Assumption 2 : either dict, or cdict, is defined, not both */ +size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs, + const void* dict, size_t dictSize, const ZSTD_CDict* cdict, + const ZSTD_CCtx_params* params, + unsigned long long pledgedSrcSize) +{ + DEBUGLOG(4, "ZSTD_initCStream_internal"); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , ""); + assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams))); + zcs->requestedParams = *params; + assert(!((dict) && (cdict))); /* either dict or cdict, not both */ + if (dict) { + FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , ""); + } else { + /* Dictionary is cleared if !cdict */ + FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , ""); + } + return 0; +} + +/* ZSTD_initCStream_usingCDict_advanced() : + * same as ZSTD_initCStream_usingCDict(), with control over frame parameters */ +size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, + const ZSTD_CDict* cdict, + ZSTD_frameParameters fParams, + unsigned long long pledgedSrcSize) +{ + DEBUGLOG(4, "ZSTD_initCStream_usingCDict_advanced"); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , ""); + zcs->requestedParams.fParams = fParams; + FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , ""); + return 0; +} + +/* note : cdict must outlive compression session */ +size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict) +{ + DEBUGLOG(4, "ZSTD_initCStream_usingCDict"); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , ""); + return 0; +} + + +/* ZSTD_initCStream_advanced() : + * pledgedSrcSize must be exact. + * if srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN. + * dict is loaded with default parameters ZSTD_dct_auto and ZSTD_dlm_byCopy. */ +size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, + const void* dict, size_t dictSize, + ZSTD_parameters params, unsigned long long pss) +{ + /* for compatibility with older programs relying on this behavior. + * Users should now specify ZSTD_CONTENTSIZE_UNKNOWN. + * This line will be removed in the future. + */ + U64 const pledgedSrcSize = (pss==0 && params.fParams.contentSizeFlag==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss; + DEBUGLOG(4, "ZSTD_initCStream_advanced"); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , ""); + FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) , ""); + ZSTD_CCtxParams_setZstdParams(&zcs->requestedParams, ¶ms); + FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , ""); + return 0; +} + +size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel) +{ + DEBUGLOG(4, "ZSTD_initCStream_usingDict"); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , ""); + return 0; +} + +size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pss) +{ + /* temporary : 0 interpreted as "unknown" during transition period. + * Users willing to specify "unknown" **must** use ZSTD_CONTENTSIZE_UNKNOWN. + * 0 will be interpreted as "empty" in the future. + */ + U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss; + DEBUGLOG(4, "ZSTD_initCStream_srcSize"); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, NULL) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , ""); + return 0; +} + +size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel) +{ + DEBUGLOG(4, "ZSTD_initCStream"); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, NULL) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , ""); + return 0; +} + +/*====== Compression ======*/ + +static size_t ZSTD_nextInputSizeHint(const ZSTD_CCtx* cctx) +{ + if (cctx->appliedParams.inBufferMode == ZSTD_bm_stable) { + return cctx->blockSize - cctx->stableIn_notConsumed; + } + assert(cctx->appliedParams.inBufferMode == ZSTD_bm_buffered); + { size_t hintInSize = cctx->inBuffTarget - cctx->inBuffPos; + if (hintInSize==0) hintInSize = cctx->blockSize; + return hintInSize; + } +} + +/** ZSTD_compressStream_generic(): + * internal function for all *compressStream*() variants + * @return : hint size for next input to complete ongoing block */ +static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs, + ZSTD_outBuffer* output, + ZSTD_inBuffer* input, + ZSTD_EndDirective const flushMode) +{ + const char* const istart = (assert(input != NULL), (const char*)input->src); + const char* const iend = (istart != NULL) ? istart + input->size : istart; + const char* ip = (istart != NULL) ? istart + input->pos : istart; + char* const ostart = (assert(output != NULL), (char*)output->dst); + char* const oend = (ostart != NULL) ? ostart + output->size : ostart; + char* op = (ostart != NULL) ? ostart + output->pos : ostart; + U32 someMoreWork = 1; + + /* check expectations */ + DEBUGLOG(5, "ZSTD_compressStream_generic, flush=%i, srcSize = %zu", (int)flushMode, input->size - input->pos); + assert(zcs != NULL); + if (zcs->appliedParams.inBufferMode == ZSTD_bm_stable) { + assert(input->pos >= zcs->stableIn_notConsumed); + input->pos -= zcs->stableIn_notConsumed; + if (ip) ip -= zcs->stableIn_notConsumed; + zcs->stableIn_notConsumed = 0; + } + if (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered) { + assert(zcs->inBuff != NULL); + assert(zcs->inBuffSize > 0); + } + if (zcs->appliedParams.outBufferMode == ZSTD_bm_buffered) { + assert(zcs->outBuff != NULL); + assert(zcs->outBuffSize > 0); + } + if (input->src == NULL) assert(input->size == 0); + assert(input->pos <= input->size); + if (output->dst == NULL) assert(output->size == 0); + assert(output->pos <= output->size); + assert((U32)flushMode <= (U32)ZSTD_e_end); + + while (someMoreWork) { + switch(zcs->streamStage) + { + case zcss_init: + RETURN_ERROR(init_missing, "call ZSTD_initCStream() first!"); + + case zcss_load: + if ( (flushMode == ZSTD_e_end) + && ( (size_t)(oend-op) >= ZSTD_compressBound(iend-ip) /* Enough output space */ + || zcs->appliedParams.outBufferMode == ZSTD_bm_stable) /* OR we are allowed to return dstSizeTooSmall */ + && (zcs->inBuffPos == 0) ) { + /* shortcut to compression pass directly into output buffer */ + size_t const cSize = ZSTD_compressEnd_public(zcs, + op, oend-op, ip, iend-ip); + DEBUGLOG(4, "ZSTD_compressEnd : cSize=%u", (unsigned)cSize); + FORWARD_IF_ERROR(cSize, "ZSTD_compressEnd failed"); + ip = iend; + op += cSize; + zcs->frameEnded = 1; + ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + someMoreWork = 0; break; + } + /* complete loading into inBuffer in buffered mode */ + if (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered) { + size_t const toLoad = zcs->inBuffTarget - zcs->inBuffPos; + size_t const loaded = ZSTD_limitCopy( + zcs->inBuff + zcs->inBuffPos, toLoad, + ip, iend-ip); + zcs->inBuffPos += loaded; + if (ip) ip += loaded; + if ( (flushMode == ZSTD_e_continue) + && (zcs->inBuffPos < zcs->inBuffTarget) ) { + /* not enough input to fill full block : stop here */ + someMoreWork = 0; break; + } + if ( (flushMode == ZSTD_e_flush) + && (zcs->inBuffPos == zcs->inToCompress) ) { + /* empty */ + someMoreWork = 0; break; + } + } else { + assert(zcs->appliedParams.inBufferMode == ZSTD_bm_stable); + if ( (flushMode == ZSTD_e_continue) + && ( (size_t)(iend - ip) < zcs->blockSize) ) { + /* can't compress a full block : stop here */ + zcs->stableIn_notConsumed = (size_t)(iend - ip); + ip = iend; /* pretend to have consumed input */ + someMoreWork = 0; break; + } + if ( (flushMode == ZSTD_e_flush) + && (ip == iend) ) { + /* empty */ + someMoreWork = 0; break; + } + } + /* compress current block (note : this stage cannot be stopped in the middle) */ + DEBUGLOG(5, "stream compression stage (flushMode==%u)", flushMode); + { int const inputBuffered = (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered); + void* cDst; + size_t cSize; + size_t oSize = oend-op; + size_t const iSize = inputBuffered ? zcs->inBuffPos - zcs->inToCompress + : MIN((size_t)(iend - ip), zcs->blockSize); + if (oSize >= ZSTD_compressBound(iSize) || zcs->appliedParams.outBufferMode == ZSTD_bm_stable) + cDst = op; /* compress into output buffer, to skip flush stage */ + else + cDst = zcs->outBuff, oSize = zcs->outBuffSize; + if (inputBuffered) { + unsigned const lastBlock = (flushMode == ZSTD_e_end) && (ip==iend); + cSize = lastBlock ? + ZSTD_compressEnd_public(zcs, cDst, oSize, + zcs->inBuff + zcs->inToCompress, iSize) : + ZSTD_compressContinue_public(zcs, cDst, oSize, + zcs->inBuff + zcs->inToCompress, iSize); + FORWARD_IF_ERROR(cSize, "%s", lastBlock ? "ZSTD_compressEnd failed" : "ZSTD_compressContinue failed"); + zcs->frameEnded = lastBlock; + /* prepare next block */ + zcs->inBuffTarget = zcs->inBuffPos + zcs->blockSize; + if (zcs->inBuffTarget > zcs->inBuffSize) + zcs->inBuffPos = 0, zcs->inBuffTarget = zcs->blockSize; + DEBUGLOG(5, "inBuffTarget:%u / inBuffSize:%u", + (unsigned)zcs->inBuffTarget, (unsigned)zcs->inBuffSize); + if (!lastBlock) + assert(zcs->inBuffTarget <= zcs->inBuffSize); + zcs->inToCompress = zcs->inBuffPos; + } else { /* !inputBuffered, hence ZSTD_bm_stable */ + unsigned const lastBlock = (flushMode == ZSTD_e_end) && (ip + iSize == iend); + cSize = lastBlock ? + ZSTD_compressEnd_public(zcs, cDst, oSize, ip, iSize) : + ZSTD_compressContinue_public(zcs, cDst, oSize, ip, iSize); + /* Consume the input prior to error checking to mirror buffered mode. */ + if (ip) ip += iSize; + FORWARD_IF_ERROR(cSize, "%s", lastBlock ? "ZSTD_compressEnd failed" : "ZSTD_compressContinue failed"); + zcs->frameEnded = lastBlock; + if (lastBlock) assert(ip == iend); + } + if (cDst == op) { /* no need to flush */ + op += cSize; + if (zcs->frameEnded) { + DEBUGLOG(5, "Frame completed directly in outBuffer"); + someMoreWork = 0; + ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + } + break; + } + zcs->outBuffContentSize = cSize; + zcs->outBuffFlushedSize = 0; + zcs->streamStage = zcss_flush; /* pass-through to flush stage */ + } + ZSTD_FALLTHROUGH; + case zcss_flush: + DEBUGLOG(5, "flush stage"); + assert(zcs->appliedParams.outBufferMode == ZSTD_bm_buffered); + { size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize; + size_t const flushed = ZSTD_limitCopy(op, (size_t)(oend-op), + zcs->outBuff + zcs->outBuffFlushedSize, toFlush); + DEBUGLOG(5, "toFlush: %u into %u ==> flushed: %u", + (unsigned)toFlush, (unsigned)(oend-op), (unsigned)flushed); + if (flushed) + op += flushed; + zcs->outBuffFlushedSize += flushed; + if (toFlush!=flushed) { + /* flush not fully completed, presumably because dst is too small */ + assert(op==oend); + someMoreWork = 0; + break; + } + zcs->outBuffContentSize = zcs->outBuffFlushedSize = 0; + if (zcs->frameEnded) { + DEBUGLOG(5, "Frame completed on flush"); + someMoreWork = 0; + ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + break; + } + zcs->streamStage = zcss_load; + break; + } + + default: /* impossible */ + assert(0); + } + } + + input->pos = ip - istart; + output->pos = op - ostart; + if (zcs->frameEnded) return 0; + return ZSTD_nextInputSizeHint(zcs); +} + +static size_t ZSTD_nextInputSizeHint_MTorST(const ZSTD_CCtx* cctx) +{ +#ifdef ZSTD_MULTITHREAD + if (cctx->appliedParams.nbWorkers >= 1) { + assert(cctx->mtctx != NULL); + return ZSTDMT_nextInputSizeHint(cctx->mtctx); + } +#endif + return ZSTD_nextInputSizeHint(cctx); + +} + +size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input) +{ + FORWARD_IF_ERROR( ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue) , ""); + return ZSTD_nextInputSizeHint_MTorST(zcs); +} + +/* After a compression call set the expected input/output buffer. + * This is validated at the start of the next compression call. + */ +static void +ZSTD_setBufferExpectations(ZSTD_CCtx* cctx, const ZSTD_outBuffer* output, const ZSTD_inBuffer* input) +{ + DEBUGLOG(5, "ZSTD_setBufferExpectations (for advanced stable in/out modes)"); + if (cctx->appliedParams.inBufferMode == ZSTD_bm_stable) { + cctx->expectedInBuffer = *input; + } + if (cctx->appliedParams.outBufferMode == ZSTD_bm_stable) { + cctx->expectedOutBufferSize = output->size - output->pos; + } +} + +/* Validate that the input/output buffers match the expectations set by + * ZSTD_setBufferExpectations. + */ +static size_t ZSTD_checkBufferStability(ZSTD_CCtx const* cctx, + ZSTD_outBuffer const* output, + ZSTD_inBuffer const* input, + ZSTD_EndDirective endOp) +{ + if (cctx->appliedParams.inBufferMode == ZSTD_bm_stable) { + ZSTD_inBuffer const expect = cctx->expectedInBuffer; + if (expect.src != input->src || expect.pos != input->pos) + RETURN_ERROR(stabilityCondition_notRespected, "ZSTD_c_stableInBuffer enabled but input differs!"); + } + (void)endOp; + if (cctx->appliedParams.outBufferMode == ZSTD_bm_stable) { + size_t const outBufferSize = output->size - output->pos; + if (cctx->expectedOutBufferSize != outBufferSize) + RETURN_ERROR(stabilityCondition_notRespected, "ZSTD_c_stableOutBuffer enabled but output size differs!"); + } + return 0; +} + +static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx, + ZSTD_EndDirective endOp, + size_t inSize) +{ + ZSTD_CCtx_params params = cctx->requestedParams; + ZSTD_prefixDict const prefixDict = cctx->prefixDict; + FORWARD_IF_ERROR( ZSTD_initLocalDict(cctx) , ""); /* Init the local dict if present. */ + ZSTD_memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict)); /* single usage */ + assert(prefixDict.dict==NULL || cctx->cdict==NULL); /* only one can be set */ + if (cctx->cdict && !cctx->localDict.cdict) { + /* Let the cdict's compression level take priority over the requested params. + * But do not take the cdict's compression level if the "cdict" is actually a localDict + * generated from ZSTD_initLocalDict(). + */ + params.compressionLevel = cctx->cdict->compressionLevel; + } + DEBUGLOG(4, "ZSTD_compressStream2 : transparent init stage"); + if (endOp == ZSTD_e_end) cctx->pledgedSrcSizePlusOne = inSize + 1; /* auto-determine pledgedSrcSize */ + + { size_t const dictSize = prefixDict.dict + ? prefixDict.dictSize + : (cctx->cdict ? cctx->cdict->dictContentSize : 0); + ZSTD_cParamMode_e const mode = ZSTD_getCParamMode(cctx->cdict, ¶ms, cctx->pledgedSrcSizePlusOne - 1); + params.cParams = ZSTD_getCParamsFromCCtxParams( + ¶ms, cctx->pledgedSrcSizePlusOne-1, + dictSize, mode); + } + + params.useBlockSplitter = ZSTD_resolveBlockSplitterMode(params.useBlockSplitter, ¶ms.cParams); + params.ldmParams.enableLdm = ZSTD_resolveEnableLdm(params.ldmParams.enableLdm, ¶ms.cParams); + params.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params.useRowMatchFinder, ¶ms.cParams); + params.validateSequences = ZSTD_resolveExternalSequenceValidation(params.validateSequences); + params.maxBlockSize = ZSTD_resolveMaxBlockSize(params.maxBlockSize); + params.searchForExternalRepcodes = ZSTD_resolveExternalRepcodeSearch(params.searchForExternalRepcodes, params.compressionLevel); + +#ifdef ZSTD_MULTITHREAD + /* If external matchfinder is enabled, make sure to fail before checking job size (for consistency) */ + RETURN_ERROR_IF( + ZSTD_hasExtSeqProd(¶ms) && params.nbWorkers >= 1, + parameter_combination_unsupported, + "External sequence producer isn't supported with nbWorkers >= 1" + ); + + if ((cctx->pledgedSrcSizePlusOne-1) <= ZSTDMT_JOBSIZE_MIN) { + params.nbWorkers = 0; /* do not invoke multi-threading when src size is too small */ + } + if (params.nbWorkers > 0) { +#if ZSTD_TRACE + cctx->traceCtx = (ZSTD_trace_compress_begin != NULL) ? ZSTD_trace_compress_begin(cctx) : 0; +#endif + /* mt context creation */ + if (cctx->mtctx == NULL) { + DEBUGLOG(4, "ZSTD_compressStream2: creating new mtctx for nbWorkers=%u", + params.nbWorkers); + cctx->mtctx = ZSTDMT_createCCtx_advanced((U32)params.nbWorkers, cctx->customMem, cctx->pool); + RETURN_ERROR_IF(cctx->mtctx == NULL, memory_allocation, "NULL pointer!"); + } + /* mt compression */ + DEBUGLOG(4, "call ZSTDMT_initCStream_internal as nbWorkers=%u", params.nbWorkers); + FORWARD_IF_ERROR( ZSTDMT_initCStream_internal( + cctx->mtctx, + prefixDict.dict, prefixDict.dictSize, prefixDict.dictContentType, + cctx->cdict, params, cctx->pledgedSrcSizePlusOne-1) , ""); + cctx->dictID = cctx->cdict ? cctx->cdict->dictID : 0; + cctx->dictContentSize = cctx->cdict ? cctx->cdict->dictContentSize : prefixDict.dictSize; + cctx->consumedSrcSize = 0; + cctx->producedCSize = 0; + cctx->streamStage = zcss_load; + cctx->appliedParams = params; + } else +#endif /* ZSTD_MULTITHREAD */ + { U64 const pledgedSrcSize = cctx->pledgedSrcSizePlusOne - 1; + assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); + FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx, + prefixDict.dict, prefixDict.dictSize, prefixDict.dictContentType, ZSTD_dtlm_fast, + cctx->cdict, + ¶ms, pledgedSrcSize, + ZSTDb_buffered) , ""); + assert(cctx->appliedParams.nbWorkers == 0); + cctx->inToCompress = 0; + cctx->inBuffPos = 0; + if (cctx->appliedParams.inBufferMode == ZSTD_bm_buffered) { + /* for small input: avoid automatic flush on reaching end of block, since + * it would require to add a 3-bytes null block to end frame + */ + cctx->inBuffTarget = cctx->blockSize + (cctx->blockSize == pledgedSrcSize); + } else { + cctx->inBuffTarget = 0; + } + cctx->outBuffContentSize = cctx->outBuffFlushedSize = 0; + cctx->streamStage = zcss_load; + cctx->frameEnded = 0; + } + return 0; +} + +/* @return provides a minimum amount of data remaining to be flushed from internal buffers + */ +size_t ZSTD_compressStream2( ZSTD_CCtx* cctx, + ZSTD_outBuffer* output, + ZSTD_inBuffer* input, + ZSTD_EndDirective endOp) +{ + DEBUGLOG(5, "ZSTD_compressStream2, endOp=%u ", (unsigned)endOp); + /* check conditions */ + RETURN_ERROR_IF(output->pos > output->size, dstSize_tooSmall, "invalid output buffer"); + RETURN_ERROR_IF(input->pos > input->size, srcSize_wrong, "invalid input buffer"); + RETURN_ERROR_IF((U32)endOp > (U32)ZSTD_e_end, parameter_outOfBound, "invalid endDirective"); + assert(cctx != NULL); + + /* transparent initialization stage */ + if (cctx->streamStage == zcss_init) { + size_t const inputSize = input->size - input->pos; /* no obligation to start from pos==0 */ + size_t const totalInputSize = inputSize + cctx->stableIn_notConsumed; + if ( (cctx->requestedParams.inBufferMode == ZSTD_bm_stable) /* input is presumed stable, across invocations */ + && (endOp == ZSTD_e_continue) /* no flush requested, more input to come */ + && (totalInputSize < ZSTD_BLOCKSIZE_MAX) ) { /* not even reached one block yet */ + if (cctx->stableIn_notConsumed) { /* not the first time */ + /* check stable source guarantees */ + RETURN_ERROR_IF(input->src != cctx->expectedInBuffer.src, stabilityCondition_notRespected, "stableInBuffer condition not respected: wrong src pointer"); + RETURN_ERROR_IF(input->pos != cctx->expectedInBuffer.size, stabilityCondition_notRespected, "stableInBuffer condition not respected: externally modified pos"); + } + /* pretend input was consumed, to give a sense forward progress */ + input->pos = input->size; + /* save stable inBuffer, for later control, and flush/end */ + cctx->expectedInBuffer = *input; + /* but actually input wasn't consumed, so keep track of position from where compression shall resume */ + cctx->stableIn_notConsumed += inputSize; + /* don't initialize yet, wait for the first block of flush() order, for better parameters adaptation */ + return ZSTD_FRAMEHEADERSIZE_MIN(cctx->requestedParams.format); /* at least some header to produce */ + } + FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx, endOp, totalInputSize), "compressStream2 initialization failed"); + ZSTD_setBufferExpectations(cctx, output, input); /* Set initial buffer expectations now that we've initialized */ + } + /* end of transparent initialization stage */ + + FORWARD_IF_ERROR(ZSTD_checkBufferStability(cctx, output, input, endOp), "invalid buffers"); + /* compression stage */ +#ifdef ZSTD_MULTITHREAD + if (cctx->appliedParams.nbWorkers > 0) { + size_t flushMin; + if (cctx->cParamsChanged) { + ZSTDMT_updateCParams_whileCompressing(cctx->mtctx, &cctx->requestedParams); + cctx->cParamsChanged = 0; + } + if (cctx->stableIn_notConsumed) { + assert(cctx->appliedParams.inBufferMode == ZSTD_bm_stable); + /* some early data was skipped - make it available for consumption */ + assert(input->pos >= cctx->stableIn_notConsumed); + input->pos -= cctx->stableIn_notConsumed; + cctx->stableIn_notConsumed = 0; + } + for (;;) { + size_t const ipos = input->pos; + size_t const opos = output->pos; + flushMin = ZSTDMT_compressStream_generic(cctx->mtctx, output, input, endOp); + cctx->consumedSrcSize += (U64)(input->pos - ipos); + cctx->producedCSize += (U64)(output->pos - opos); + if ( ZSTD_isError(flushMin) + || (endOp == ZSTD_e_end && flushMin == 0) ) { /* compression completed */ + if (flushMin == 0) + ZSTD_CCtx_trace(cctx, 0); + ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only); + } + FORWARD_IF_ERROR(flushMin, "ZSTDMT_compressStream_generic failed"); + + if (endOp == ZSTD_e_continue) { + /* We only require some progress with ZSTD_e_continue, not maximal progress. + * We're done if we've consumed or produced any bytes, or either buffer is + * full. + */ + if (input->pos != ipos || output->pos != opos || input->pos == input->size || output->pos == output->size) + break; + } else { + assert(endOp == ZSTD_e_flush || endOp == ZSTD_e_end); + /* We require maximal progress. We're done when the flush is complete or the + * output buffer is full. + */ + if (flushMin == 0 || output->pos == output->size) + break; + } + } + DEBUGLOG(5, "completed ZSTD_compressStream2 delegating to ZSTDMT_compressStream_generic"); + /* Either we don't require maximum forward progress, we've finished the + * flush, or we are out of output space. + */ + assert(endOp == ZSTD_e_continue || flushMin == 0 || output->pos == output->size); + ZSTD_setBufferExpectations(cctx, output, input); + return flushMin; + } +#endif /* ZSTD_MULTITHREAD */ + FORWARD_IF_ERROR( ZSTD_compressStream_generic(cctx, output, input, endOp) , ""); + DEBUGLOG(5, "completed ZSTD_compressStream2"); + ZSTD_setBufferExpectations(cctx, output, input); + return cctx->outBuffContentSize - cctx->outBuffFlushedSize; /* remaining to flush */ +} + +size_t ZSTD_compressStream2_simpleArgs ( + ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, size_t* dstPos, + const void* src, size_t srcSize, size_t* srcPos, + ZSTD_EndDirective endOp) +{ + ZSTD_outBuffer output; + ZSTD_inBuffer input; + output.dst = dst; + output.size = dstCapacity; + output.pos = *dstPos; + input.src = src; + input.size = srcSize; + input.pos = *srcPos; + /* ZSTD_compressStream2() will check validity of dstPos and srcPos */ + { size_t const cErr = ZSTD_compressStream2(cctx, &output, &input, endOp); + *dstPos = output.pos; + *srcPos = input.pos; + return cErr; + } +} + +size_t ZSTD_compress2(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize) +{ + ZSTD_bufferMode_e const originalInBufferMode = cctx->requestedParams.inBufferMode; + ZSTD_bufferMode_e const originalOutBufferMode = cctx->requestedParams.outBufferMode; + DEBUGLOG(4, "ZSTD_compress2 (srcSize=%u)", (unsigned)srcSize); + ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only); + /* Enable stable input/output buffers. */ + cctx->requestedParams.inBufferMode = ZSTD_bm_stable; + cctx->requestedParams.outBufferMode = ZSTD_bm_stable; + { size_t oPos = 0; + size_t iPos = 0; + size_t const result = ZSTD_compressStream2_simpleArgs(cctx, + dst, dstCapacity, &oPos, + src, srcSize, &iPos, + ZSTD_e_end); + /* Reset to the original values. */ + cctx->requestedParams.inBufferMode = originalInBufferMode; + cctx->requestedParams.outBufferMode = originalOutBufferMode; + + FORWARD_IF_ERROR(result, "ZSTD_compressStream2_simpleArgs failed"); + if (result != 0) { /* compression not completed, due to lack of output space */ + assert(oPos == dstCapacity); + RETURN_ERROR(dstSize_tooSmall, ""); + } + assert(iPos == srcSize); /* all input is expected consumed */ + return oPos; + } +} + +/* ZSTD_validateSequence() : + * @offCode : is presumed to follow format required by ZSTD_storeSeq() + * @returns a ZSTD error code if sequence is not valid + */ +static size_t +ZSTD_validateSequence(U32 offCode, U32 matchLength, U32 minMatch, + size_t posInSrc, U32 windowLog, size_t dictSize, int useSequenceProducer) +{ + U32 const windowSize = 1u << windowLog; + /* posInSrc represents the amount of data the decoder would decode up to this point. + * As long as the amount of data decoded is less than or equal to window size, offsets may be + * larger than the total length of output decoded in order to reference the dict, even larger than + * window size. After output surpasses windowSize, we're limited to windowSize offsets again. + */ + size_t const offsetBound = posInSrc > windowSize ? (size_t)windowSize : posInSrc + (size_t)dictSize; + size_t const matchLenLowerBound = (minMatch == 3 || useSequenceProducer) ? 3 : 4; + RETURN_ERROR_IF(offCode > OFFSET_TO_OFFBASE(offsetBound), externalSequences_invalid, "Offset too large!"); + /* Validate maxNbSeq is large enough for the given matchLength and minMatch */ + RETURN_ERROR_IF(matchLength < matchLenLowerBound, externalSequences_invalid, "Matchlength too small for the minMatch"); + return 0; +} + +/* Returns an offset code, given a sequence's raw offset, the ongoing repcode array, and whether litLength == 0 */ +static U32 ZSTD_finalizeOffBase(U32 rawOffset, const U32 rep[ZSTD_REP_NUM], U32 ll0) +{ + U32 offBase = OFFSET_TO_OFFBASE(rawOffset); + + if (!ll0 && rawOffset == rep[0]) { + offBase = REPCODE1_TO_OFFBASE; + } else if (rawOffset == rep[1]) { + offBase = REPCODE_TO_OFFBASE(2 - ll0); + } else if (rawOffset == rep[2]) { + offBase = REPCODE_TO_OFFBASE(3 - ll0); + } else if (ll0 && rawOffset == rep[0] - 1) { + offBase = REPCODE3_TO_OFFBASE; + } + return offBase; +} + +size_t +ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx, + ZSTD_sequencePosition* seqPos, + const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, + const void* src, size_t blockSize, + ZSTD_paramSwitch_e externalRepSearch) +{ + U32 idx = seqPos->idx; + U32 const startIdx = idx; + BYTE const* ip = (BYTE const*)(src); + const BYTE* const iend = ip + blockSize; + repcodes_t updatedRepcodes; + U32 dictSize; + + DEBUGLOG(5, "ZSTD_copySequencesToSeqStoreExplicitBlockDelim (blockSize = %zu)", blockSize); + + if (cctx->cdict) { + dictSize = (U32)cctx->cdict->dictContentSize; + } else if (cctx->prefixDict.dict) { + dictSize = (U32)cctx->prefixDict.dictSize; + } else { + dictSize = 0; + } + ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(repcodes_t)); + for (; idx < inSeqsSize && (inSeqs[idx].matchLength != 0 || inSeqs[idx].offset != 0); ++idx) { + U32 const litLength = inSeqs[idx].litLength; + U32 const matchLength = inSeqs[idx].matchLength; + U32 offBase; + + if (externalRepSearch == ZSTD_ps_disable) { + offBase = OFFSET_TO_OFFBASE(inSeqs[idx].offset); + } else { + U32 const ll0 = (litLength == 0); + offBase = ZSTD_finalizeOffBase(inSeqs[idx].offset, updatedRepcodes.rep, ll0); + ZSTD_updateRep(updatedRepcodes.rep, offBase, ll0); + } + + DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offBase, matchLength, litLength); + if (cctx->appliedParams.validateSequences) { + seqPos->posInSrc += litLength + matchLength; + FORWARD_IF_ERROR(ZSTD_validateSequence(offBase, matchLength, cctx->appliedParams.cParams.minMatch, seqPos->posInSrc, + cctx->appliedParams.cParams.windowLog, dictSize, ZSTD_hasExtSeqProd(&cctx->appliedParams)), + "Sequence validation failed"); + } + RETURN_ERROR_IF(idx - seqPos->idx >= cctx->seqStore.maxNbSeq, externalSequences_invalid, + "Not enough memory allocated. Try adjusting ZSTD_c_minMatch."); + ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offBase, matchLength); + ip += matchLength + litLength; + } + + /* If we skipped repcode search while parsing, we need to update repcodes now */ + assert(externalRepSearch != ZSTD_ps_auto); + assert(idx >= startIdx); + if (externalRepSearch == ZSTD_ps_disable && idx != startIdx) { + U32* const rep = updatedRepcodes.rep; + U32 lastSeqIdx = idx - 1; /* index of last non-block-delimiter sequence */ + + if (lastSeqIdx >= startIdx + 2) { + rep[2] = inSeqs[lastSeqIdx - 2].offset; + rep[1] = inSeqs[lastSeqIdx - 1].offset; + rep[0] = inSeqs[lastSeqIdx].offset; + } else if (lastSeqIdx == startIdx + 1) { + rep[2] = rep[0]; + rep[1] = inSeqs[lastSeqIdx - 1].offset; + rep[0] = inSeqs[lastSeqIdx].offset; + } else { + assert(lastSeqIdx == startIdx); + rep[2] = rep[1]; + rep[1] = rep[0]; + rep[0] = inSeqs[lastSeqIdx].offset; + } + } + + ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(repcodes_t)); + + if (inSeqs[idx].litLength) { + DEBUGLOG(6, "Storing last literals of size: %u", inSeqs[idx].litLength); + ZSTD_storeLastLiterals(&cctx->seqStore, ip, inSeqs[idx].litLength); + ip += inSeqs[idx].litLength; + seqPos->posInSrc += inSeqs[idx].litLength; + } + RETURN_ERROR_IF(ip != iend, externalSequences_invalid, "Blocksize doesn't agree with block delimiter!"); + seqPos->idx = idx+1; + return 0; +} + +size_t +ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos, + const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, + const void* src, size_t blockSize, ZSTD_paramSwitch_e externalRepSearch) +{ + U32 idx = seqPos->idx; + U32 startPosInSequence = seqPos->posInSequence; + U32 endPosInSequence = seqPos->posInSequence + (U32)blockSize; + size_t dictSize; + BYTE const* ip = (BYTE const*)(src); + BYTE const* iend = ip + blockSize; /* May be adjusted if we decide to process fewer than blockSize bytes */ + repcodes_t updatedRepcodes; + U32 bytesAdjustment = 0; + U32 finalMatchSplit = 0; + + /* TODO(embg) support fast parsing mode in noBlockDelim mode */ + (void)externalRepSearch; + + if (cctx->cdict) { + dictSize = cctx->cdict->dictContentSize; + } else if (cctx->prefixDict.dict) { + dictSize = cctx->prefixDict.dictSize; + } else { + dictSize = 0; + } + DEBUGLOG(5, "ZSTD_copySequencesToSeqStoreNoBlockDelim: idx: %u PIS: %u blockSize: %zu", idx, startPosInSequence, blockSize); + DEBUGLOG(5, "Start seq: idx: %u (of: %u ml: %u ll: %u)", idx, inSeqs[idx].offset, inSeqs[idx].matchLength, inSeqs[idx].litLength); + ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(repcodes_t)); + while (endPosInSequence && idx < inSeqsSize && !finalMatchSplit) { + const ZSTD_Sequence currSeq = inSeqs[idx]; + U32 litLength = currSeq.litLength; + U32 matchLength = currSeq.matchLength; + U32 const rawOffset = currSeq.offset; + U32 offBase; + + /* Modify the sequence depending on where endPosInSequence lies */ + if (endPosInSequence >= currSeq.litLength + currSeq.matchLength) { + if (startPosInSequence >= litLength) { + startPosInSequence -= litLength; + litLength = 0; + matchLength -= startPosInSequence; + } else { + litLength -= startPosInSequence; + } + /* Move to the next sequence */ + endPosInSequence -= currSeq.litLength + currSeq.matchLength; + startPosInSequence = 0; + } else { + /* This is the final (partial) sequence we're adding from inSeqs, and endPosInSequence + does not reach the end of the match. So, we have to split the sequence */ + DEBUGLOG(6, "Require a split: diff: %u, idx: %u PIS: %u", + currSeq.litLength + currSeq.matchLength - endPosInSequence, idx, endPosInSequence); + if (endPosInSequence > litLength) { + U32 firstHalfMatchLength; + litLength = startPosInSequence >= litLength ? 0 : litLength - startPosInSequence; + firstHalfMatchLength = endPosInSequence - startPosInSequence - litLength; + if (matchLength > blockSize && firstHalfMatchLength >= cctx->appliedParams.cParams.minMatch) { + /* Only ever split the match if it is larger than the block size */ + U32 secondHalfMatchLength = currSeq.matchLength + currSeq.litLength - endPosInSequence; + if (secondHalfMatchLength < cctx->appliedParams.cParams.minMatch) { + /* Move the endPosInSequence backward so that it creates match of minMatch length */ + endPosInSequence -= cctx->appliedParams.cParams.minMatch - secondHalfMatchLength; + bytesAdjustment = cctx->appliedParams.cParams.minMatch - secondHalfMatchLength; + firstHalfMatchLength -= bytesAdjustment; + } + matchLength = firstHalfMatchLength; + /* Flag that we split the last match - after storing the sequence, exit the loop, + but keep the value of endPosInSequence */ + finalMatchSplit = 1; + } else { + /* Move the position in sequence backwards so that we don't split match, and break to store + * the last literals. We use the original currSeq.litLength as a marker for where endPosInSequence + * should go. We prefer to do this whenever it is not necessary to split the match, or if doing so + * would cause the first half of the match to be too small + */ + bytesAdjustment = endPosInSequence - currSeq.litLength; + endPosInSequence = currSeq.litLength; + break; + } + } else { + /* This sequence ends inside the literals, break to store the last literals */ + break; + } + } + /* Check if this offset can be represented with a repcode */ + { U32 const ll0 = (litLength == 0); + offBase = ZSTD_finalizeOffBase(rawOffset, updatedRepcodes.rep, ll0); + ZSTD_updateRep(updatedRepcodes.rep, offBase, ll0); + } + + if (cctx->appliedParams.validateSequences) { + seqPos->posInSrc += litLength + matchLength; + FORWARD_IF_ERROR(ZSTD_validateSequence(offBase, matchLength, cctx->appliedParams.cParams.minMatch, seqPos->posInSrc, + cctx->appliedParams.cParams.windowLog, dictSize, ZSTD_hasExtSeqProd(&cctx->appliedParams)), + "Sequence validation failed"); + } + DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offBase, matchLength, litLength); + RETURN_ERROR_IF(idx - seqPos->idx >= cctx->seqStore.maxNbSeq, externalSequences_invalid, + "Not enough memory allocated. Try adjusting ZSTD_c_minMatch."); + ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offBase, matchLength); + ip += matchLength + litLength; + if (!finalMatchSplit) + idx++; /* Next Sequence */ + } + DEBUGLOG(5, "Ending seq: idx: %u (of: %u ml: %u ll: %u)", idx, inSeqs[idx].offset, inSeqs[idx].matchLength, inSeqs[idx].litLength); + assert(idx == inSeqsSize || endPosInSequence <= inSeqs[idx].litLength + inSeqs[idx].matchLength); + seqPos->idx = idx; + seqPos->posInSequence = endPosInSequence; + ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(repcodes_t)); + + iend -= bytesAdjustment; + if (ip != iend) { + /* Store any last literals */ + U32 lastLLSize = (U32)(iend - ip); + assert(ip <= iend); + DEBUGLOG(6, "Storing last literals of size: %u", lastLLSize); + ZSTD_storeLastLiterals(&cctx->seqStore, ip, lastLLSize); + seqPos->posInSrc += lastLLSize; + } + + return bytesAdjustment; +} + +typedef size_t (*ZSTD_sequenceCopier) (ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos, + const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, + const void* src, size_t blockSize, ZSTD_paramSwitch_e externalRepSearch); +static ZSTD_sequenceCopier ZSTD_selectSequenceCopier(ZSTD_sequenceFormat_e mode) +{ + ZSTD_sequenceCopier sequenceCopier = NULL; + assert(ZSTD_cParam_withinBounds(ZSTD_c_blockDelimiters, mode)); + if (mode == ZSTD_sf_explicitBlockDelimiters) { + return ZSTD_copySequencesToSeqStoreExplicitBlockDelim; + } else if (mode == ZSTD_sf_noBlockDelimiters) { + return ZSTD_copySequencesToSeqStoreNoBlockDelim; + } + assert(sequenceCopier != NULL); + return sequenceCopier; +} + +/* Discover the size of next block by searching for the delimiter. + * Note that a block delimiter **must** exist in this mode, + * otherwise it's an input error. + * The block size retrieved will be later compared to ensure it remains within bounds */ +static size_t +blockSize_explicitDelimiter(const ZSTD_Sequence* inSeqs, size_t inSeqsSize, ZSTD_sequencePosition seqPos) +{ + int end = 0; + size_t blockSize = 0; + size_t spos = seqPos.idx; + DEBUGLOG(6, "blockSize_explicitDelimiter : seq %zu / %zu", spos, inSeqsSize); + assert(spos <= inSeqsSize); + while (spos < inSeqsSize) { + end = (inSeqs[spos].offset == 0); + blockSize += inSeqs[spos].litLength + inSeqs[spos].matchLength; + if (end) { + if (inSeqs[spos].matchLength != 0) + RETURN_ERROR(externalSequences_invalid, "delimiter format error : both matchlength and offset must be == 0"); + break; + } + spos++; + } + if (!end) + RETURN_ERROR(externalSequences_invalid, "Reached end of sequences without finding a block delimiter"); + return blockSize; +} + +/* More a "target" block size */ +static size_t blockSize_noDelimiter(size_t blockSize, size_t remaining) +{ + int const lastBlock = (remaining <= blockSize); + return lastBlock ? remaining : blockSize; +} + +static size_t determine_blockSize(ZSTD_sequenceFormat_e mode, + size_t blockSize, size_t remaining, + const ZSTD_Sequence* inSeqs, size_t inSeqsSize, ZSTD_sequencePosition seqPos) +{ + DEBUGLOG(6, "determine_blockSize : remainingSize = %zu", remaining); + if (mode == ZSTD_sf_noBlockDelimiters) + return blockSize_noDelimiter(blockSize, remaining); + { size_t const explicitBlockSize = blockSize_explicitDelimiter(inSeqs, inSeqsSize, seqPos); + FORWARD_IF_ERROR(explicitBlockSize, "Error while determining block size with explicit delimiters"); + if (explicitBlockSize > blockSize) + RETURN_ERROR(externalSequences_invalid, "sequences incorrectly define a too large block"); + if (explicitBlockSize > remaining) + RETURN_ERROR(externalSequences_invalid, "sequences define a frame longer than source"); + return explicitBlockSize; + } +} + +/* Compress, block-by-block, all of the sequences given. + * + * Returns the cumulative size of all compressed blocks (including their headers), + * otherwise a ZSTD error. + */ +static size_t +ZSTD_compressSequences_internal(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const ZSTD_Sequence* inSeqs, size_t inSeqsSize, + const void* src, size_t srcSize) +{ + size_t cSize = 0; + size_t remaining = srcSize; + ZSTD_sequencePosition seqPos = {0, 0, 0}; + + BYTE const* ip = (BYTE const*)src; + BYTE* op = (BYTE*)dst; + ZSTD_sequenceCopier const sequenceCopier = ZSTD_selectSequenceCopier(cctx->appliedParams.blockDelimiters); + + DEBUGLOG(4, "ZSTD_compressSequences_internal srcSize: %zu, inSeqsSize: %zu", srcSize, inSeqsSize); + /* Special case: empty frame */ + if (remaining == 0) { + U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1); + RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "No room for empty frame block header"); + MEM_writeLE32(op, cBlockHeader24); + op += ZSTD_blockHeaderSize; + dstCapacity -= ZSTD_blockHeaderSize; + cSize += ZSTD_blockHeaderSize; + } + + while (remaining) { + size_t compressedSeqsSize; + size_t cBlockSize; + size_t additionalByteAdjustment; + size_t blockSize = determine_blockSize(cctx->appliedParams.blockDelimiters, + cctx->blockSize, remaining, + inSeqs, inSeqsSize, seqPos); + U32 const lastBlock = (blockSize == remaining); + FORWARD_IF_ERROR(blockSize, "Error while trying to determine block size"); + assert(blockSize <= remaining); + ZSTD_resetSeqStore(&cctx->seqStore); + DEBUGLOG(5, "Working on new block. Blocksize: %zu (total:%zu)", blockSize, (ip - (const BYTE*)src) + blockSize); + + additionalByteAdjustment = sequenceCopier(cctx, &seqPos, inSeqs, inSeqsSize, ip, blockSize, cctx->appliedParams.searchForExternalRepcodes); + FORWARD_IF_ERROR(additionalByteAdjustment, "Bad sequence copy"); + blockSize -= additionalByteAdjustment; + + /* If blocks are too small, emit as a nocompress block */ + /* TODO: See 3090. We reduced MIN_CBLOCK_SIZE from 3 to 2 so to compensate we are adding + * additional 1. We need to revisit and change this logic to be more consistent */ + if (blockSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1+1) { + cBlockSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock); + FORWARD_IF_ERROR(cBlockSize, "Nocompress block failed"); + DEBUGLOG(5, "Block too small, writing out nocompress block: cSize: %zu", cBlockSize); + cSize += cBlockSize; + ip += blockSize; + op += cBlockSize; + remaining -= blockSize; + dstCapacity -= cBlockSize; + continue; + } + + RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize, dstSize_tooSmall, "not enough dstCapacity to write a new compressed block"); + compressedSeqsSize = ZSTD_entropyCompressSeqStore(&cctx->seqStore, + &cctx->blockState.prevCBlock->entropy, &cctx->blockState.nextCBlock->entropy, + &cctx->appliedParams, + op + ZSTD_blockHeaderSize /* Leave space for block header */, dstCapacity - ZSTD_blockHeaderSize, + blockSize, + cctx->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */, + cctx->bmi2); + FORWARD_IF_ERROR(compressedSeqsSize, "Compressing sequences of block failed"); + DEBUGLOG(5, "Compressed sequences size: %zu", compressedSeqsSize); + + if (!cctx->isFirstBlock && + ZSTD_maybeRLE(&cctx->seqStore) && + ZSTD_isRLE(ip, blockSize)) { + /* We don't want to emit our first block as a RLE even if it qualifies because + * doing so will cause the decoder (cli only) to throw a "should consume all input error." + * This is only an issue for zstd <= v1.4.3 + */ + compressedSeqsSize = 1; + } + + if (compressedSeqsSize == 0) { + /* ZSTD_noCompressBlock writes the block header as well */ + cBlockSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock); + FORWARD_IF_ERROR(cBlockSize, "ZSTD_noCompressBlock failed"); + DEBUGLOG(5, "Writing out nocompress block, size: %zu", cBlockSize); + } else if (compressedSeqsSize == 1) { + cBlockSize = ZSTD_rleCompressBlock(op, dstCapacity, *ip, blockSize, lastBlock); + FORWARD_IF_ERROR(cBlockSize, "ZSTD_rleCompressBlock failed"); + DEBUGLOG(5, "Writing out RLE block, size: %zu", cBlockSize); + } else { + U32 cBlockHeader; + /* Error checking and repcodes update */ + ZSTD_blockState_confirmRepcodesAndEntropyTables(&cctx->blockState); + if (cctx->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) + cctx->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; + + /* Write block header into beginning of block*/ + cBlockHeader = lastBlock + (((U32)bt_compressed)<<1) + (U32)(compressedSeqsSize << 3); + MEM_writeLE24(op, cBlockHeader); + cBlockSize = ZSTD_blockHeaderSize + compressedSeqsSize; + DEBUGLOG(5, "Writing out compressed block, size: %zu", cBlockSize); + } + + cSize += cBlockSize; + + if (lastBlock) { + break; + } else { + ip += blockSize; + op += cBlockSize; + remaining -= blockSize; + dstCapacity -= cBlockSize; + cctx->isFirstBlock = 0; + } + DEBUGLOG(5, "cSize running total: %zu (remaining dstCapacity=%zu)", cSize, dstCapacity); + } + + DEBUGLOG(4, "cSize final total: %zu", cSize); + return cSize; +} + +size_t ZSTD_compressSequences(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const ZSTD_Sequence* inSeqs, size_t inSeqsSize, + const void* src, size_t srcSize) +{ + BYTE* op = (BYTE*)dst; + size_t cSize = 0; + size_t compressedBlocksSize = 0; + size_t frameHeaderSize = 0; + + /* Transparent initialization stage, same as compressStream2() */ + DEBUGLOG(4, "ZSTD_compressSequences (dstCapacity=%zu)", dstCapacity); + assert(cctx != NULL); + FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx, ZSTD_e_end, srcSize), "CCtx initialization failed"); + /* Begin writing output, starting with frame header */ + frameHeaderSize = ZSTD_writeFrameHeader(op, dstCapacity, &cctx->appliedParams, srcSize, cctx->dictID); + op += frameHeaderSize; + dstCapacity -= frameHeaderSize; + cSize += frameHeaderSize; + if (cctx->appliedParams.fParams.checksumFlag && srcSize) { + XXH64_update(&cctx->xxhState, src, srcSize); + } + /* cSize includes block header size and compressed sequences size */ + compressedBlocksSize = ZSTD_compressSequences_internal(cctx, + op, dstCapacity, + inSeqs, inSeqsSize, + src, srcSize); + FORWARD_IF_ERROR(compressedBlocksSize, "Compressing blocks failed!"); + cSize += compressedBlocksSize; + dstCapacity -= compressedBlocksSize; + + if (cctx->appliedParams.fParams.checksumFlag) { + U32 const checksum = (U32) XXH64_digest(&cctx->xxhState); + RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for checksum"); + DEBUGLOG(4, "Write checksum : %08X", (unsigned)checksum); + MEM_writeLE32((char*)dst + cSize, checksum); + cSize += 4; + } + + DEBUGLOG(4, "Final compressed size: %zu", cSize); + return cSize; +} + +/*====== Finalize ======*/ + +static ZSTD_inBuffer inBuffer_forEndFlush(const ZSTD_CStream* zcs) +{ + const ZSTD_inBuffer nullInput = { NULL, 0, 0 }; + const int stableInput = (zcs->appliedParams.inBufferMode == ZSTD_bm_stable); + return stableInput ? zcs->expectedInBuffer : nullInput; +} + +/*! ZSTD_flushStream() : + * @return : amount of data remaining to flush */ +size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output) +{ + ZSTD_inBuffer input = inBuffer_forEndFlush(zcs); + input.size = input.pos; /* do not ingest more input during flush */ + return ZSTD_compressStream2(zcs, output, &input, ZSTD_e_flush); +} + + +size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output) +{ + ZSTD_inBuffer input = inBuffer_forEndFlush(zcs); + size_t const remainingToFlush = ZSTD_compressStream2(zcs, output, &input, ZSTD_e_end); + FORWARD_IF_ERROR(remainingToFlush , "ZSTD_compressStream2(,,ZSTD_e_end) failed"); + if (zcs->appliedParams.nbWorkers > 0) return remainingToFlush; /* minimal estimation */ + /* single thread mode : attempt to calculate remaining to flush more precisely */ + { size_t const lastBlockSize = zcs->frameEnded ? 0 : ZSTD_BLOCKHEADERSIZE; + size_t const checksumSize = (size_t)(zcs->frameEnded ? 0 : zcs->appliedParams.fParams.checksumFlag * 4); + size_t const toFlush = remainingToFlush + lastBlockSize + checksumSize; + DEBUGLOG(4, "ZSTD_endStream : remaining to flush : %u", (unsigned)toFlush); + return toFlush; + } +} + + +/*-===== Pre-defined compression levels =====-*/ +#include "clevels.h" + +int ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; } +int ZSTD_minCLevel(void) { return (int)-ZSTD_TARGETLENGTH_MAX; } +int ZSTD_defaultCLevel(void) { return ZSTD_CLEVEL_DEFAULT; } + +static ZSTD_compressionParameters ZSTD_dedicatedDictSearch_getCParams(int const compressionLevel, size_t const dictSize) +{ + ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, 0, dictSize, ZSTD_cpm_createCDict); + switch (cParams.strategy) { + case ZSTD_fast: + case ZSTD_dfast: + break; + case ZSTD_greedy: + case ZSTD_lazy: + case ZSTD_lazy2: + cParams.hashLog += ZSTD_LAZY_DDSS_BUCKET_LOG; + break; + case ZSTD_btlazy2: + case ZSTD_btopt: + case ZSTD_btultra: + case ZSTD_btultra2: + break; + } + return cParams; +} + +static int ZSTD_dedicatedDictSearch_isSupported( + ZSTD_compressionParameters const* cParams) +{ + return (cParams->strategy >= ZSTD_greedy) + && (cParams->strategy <= ZSTD_lazy2) + && (cParams->hashLog > cParams->chainLog) + && (cParams->chainLog <= 24); +} + +/** + * Reverses the adjustment applied to cparams when enabling dedicated dict + * search. This is used to recover the params set to be used in the working + * context. (Otherwise, those tables would also grow.) + */ +static void ZSTD_dedicatedDictSearch_revertCParams( + ZSTD_compressionParameters* cParams) { + switch (cParams->strategy) { + case ZSTD_fast: + case ZSTD_dfast: + break; + case ZSTD_greedy: + case ZSTD_lazy: + case ZSTD_lazy2: + cParams->hashLog -= ZSTD_LAZY_DDSS_BUCKET_LOG; + if (cParams->hashLog < ZSTD_HASHLOG_MIN) { + cParams->hashLog = ZSTD_HASHLOG_MIN; + } + break; + case ZSTD_btlazy2: + case ZSTD_btopt: + case ZSTD_btultra: + case ZSTD_btultra2: + break; + } +} + +static U64 ZSTD_getCParamRowSize(U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode) +{ + switch (mode) { + case ZSTD_cpm_unknown: + case ZSTD_cpm_noAttachDict: + case ZSTD_cpm_createCDict: + break; + case ZSTD_cpm_attachDict: + dictSize = 0; + break; + default: + assert(0); + break; + } + { int const unknown = srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN; + size_t const addedSize = unknown && dictSize > 0 ? 500 : 0; + return unknown && dictSize == 0 ? ZSTD_CONTENTSIZE_UNKNOWN : srcSizeHint+dictSize+addedSize; + } +} + +/*! ZSTD_getCParams_internal() : + * @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize. + * Note: srcSizeHint 0 means 0, use ZSTD_CONTENTSIZE_UNKNOWN for unknown. + * Use dictSize == 0 for unknown or unused. + * Note: `mode` controls how we treat the `dictSize`. See docs for `ZSTD_cParamMode_e`. */ +static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode) +{ + U64 const rSize = ZSTD_getCParamRowSize(srcSizeHint, dictSize, mode); + U32 const tableID = (rSize <= 256 KB) + (rSize <= 128 KB) + (rSize <= 16 KB); + int row; + DEBUGLOG(5, "ZSTD_getCParams_internal (cLevel=%i)", compressionLevel); + + /* row */ + if (compressionLevel == 0) row = ZSTD_CLEVEL_DEFAULT; /* 0 == default */ + else if (compressionLevel < 0) row = 0; /* entry 0 is baseline for fast mode */ + else if (compressionLevel > ZSTD_MAX_CLEVEL) row = ZSTD_MAX_CLEVEL; + else row = compressionLevel; + + { ZSTD_compressionParameters cp = ZSTD_defaultCParameters[tableID][row]; + DEBUGLOG(5, "ZSTD_getCParams_internal selected tableID: %u row: %u strat: %u", tableID, row, (U32)cp.strategy); + /* acceleration factor */ + if (compressionLevel < 0) { + int const clampedCompressionLevel = MAX(ZSTD_minCLevel(), compressionLevel); + cp.targetLength = (unsigned)(-clampedCompressionLevel); + } + /* refine parameters based on srcSize & dictSize */ + return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize, mode, ZSTD_ps_auto); + } +} + +/*! ZSTD_getCParams() : + * @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize. + * Size values are optional, provide 0 if not known or unused */ +ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) +{ + if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN; + return ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize, ZSTD_cpm_unknown); +} + +/*! ZSTD_getParams() : + * same idea as ZSTD_getCParams() + * @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`). + * Fields of `ZSTD_frameParameters` are set to default values */ +static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode) { + ZSTD_parameters params; + ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize, mode); + DEBUGLOG(5, "ZSTD_getParams (cLevel=%i)", compressionLevel); + ZSTD_memset(¶ms, 0, sizeof(params)); + params.cParams = cParams; + params.fParams.contentSizeFlag = 1; + return params; +} + +/*! ZSTD_getParams() : + * same idea as ZSTD_getCParams() + * @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`). + * Fields of `ZSTD_frameParameters` are set to default values */ +ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) { + if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN; + return ZSTD_getParams_internal(compressionLevel, srcSizeHint, dictSize, ZSTD_cpm_unknown); +} + +void ZSTD_registerSequenceProducer( + ZSTD_CCtx* zc, + void* extSeqProdState, + ZSTD_sequenceProducer_F extSeqProdFunc +) { + assert(zc != NULL); + ZSTD_CCtxParams_registerSequenceProducer( + &zc->requestedParams, extSeqProdState, extSeqProdFunc + ); +} + +void ZSTD_CCtxParams_registerSequenceProducer( + ZSTD_CCtx_params* params, + void* extSeqProdState, + ZSTD_sequenceProducer_F extSeqProdFunc +) { + assert(params != NULL); + if (extSeqProdFunc != NULL) { + params->extSeqProdFunc = extSeqProdFunc; + params->extSeqProdState = extSeqProdState; + } else { + params->extSeqProdFunc = NULL; + params->extSeqProdState = NULL; + } +} diff --git a/c-blosc/internal-complibs/zstd-1.5.6/compress/zstd_compress_internal.h b/c-blosc/internal-complibs/zstd-1.5.6/compress/zstd_compress_internal.h new file mode 100644 index 0000000..e41d7b7 --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.5.6/compress/zstd_compress_internal.h @@ -0,0 +1,1534 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/* This header contains definitions + * that shall **only** be used by modules within lib/compress. + */ + +#ifndef ZSTD_COMPRESS_H +#define ZSTD_COMPRESS_H + +/*-************************************* +* Dependencies +***************************************/ +#include "../common/zstd_internal.h" +#include "zstd_cwksp.h" +#ifdef ZSTD_MULTITHREAD +# include "zstdmt_compress.h" +#endif +#include "../common/bits.h" /* ZSTD_highbit32, ZSTD_NbCommonBytes */ + +#if defined (__cplusplus) +extern "C" { +#endif + +/*-************************************* +* Constants +***************************************/ +#define kSearchStrength 8 +#define HASH_READ_SIZE 8 +#define ZSTD_DUBT_UNSORTED_MARK 1 /* For btlazy2 strategy, index ZSTD_DUBT_UNSORTED_MARK==1 means "unsorted". + It could be confused for a real successor at index "1", if sorted as larger than its predecessor. + It's not a big deal though : candidate will just be sorted again. + Additionally, candidate position 1 will be lost. + But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss. + The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table reuse with a different strategy. + This constant is required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */ + + +/*-************************************* +* Context memory management +***************************************/ +typedef enum { ZSTDcs_created=0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZSTD_compressionStage_e; +typedef enum { zcss_init=0, zcss_load, zcss_flush } ZSTD_cStreamStage; + +typedef struct ZSTD_prefixDict_s { + const void* dict; + size_t dictSize; + ZSTD_dictContentType_e dictContentType; +} ZSTD_prefixDict; + +typedef struct { + void* dictBuffer; + void const* dict; + size_t dictSize; + ZSTD_dictContentType_e dictContentType; + ZSTD_CDict* cdict; +} ZSTD_localDict; + +typedef struct { + HUF_CElt CTable[HUF_CTABLE_SIZE_ST(255)]; + HUF_repeat repeatMode; +} ZSTD_hufCTables_t; + +typedef struct { + FSE_CTable offcodeCTable[FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)]; + FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)]; + FSE_CTable litlengthCTable[FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)]; + FSE_repeat offcode_repeatMode; + FSE_repeat matchlength_repeatMode; + FSE_repeat litlength_repeatMode; +} ZSTD_fseCTables_t; + +typedef struct { + ZSTD_hufCTables_t huf; + ZSTD_fseCTables_t fse; +} ZSTD_entropyCTables_t; + +/*********************************************** +* Entropy buffer statistics structs and funcs * +***********************************************/ +/** ZSTD_hufCTablesMetadata_t : + * Stores Literals Block Type for a super-block in hType, and + * huffman tree description in hufDesBuffer. + * hufDesSize refers to the size of huffman tree description in bytes. + * This metadata is populated in ZSTD_buildBlockEntropyStats_literals() */ +typedef struct { + symbolEncodingType_e hType; + BYTE hufDesBuffer[ZSTD_MAX_HUF_HEADER_SIZE]; + size_t hufDesSize; +} ZSTD_hufCTablesMetadata_t; + +/** ZSTD_fseCTablesMetadata_t : + * Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and + * fse tables in fseTablesBuffer. + * fseTablesSize refers to the size of fse tables in bytes. + * This metadata is populated in ZSTD_buildBlockEntropyStats_sequences() */ +typedef struct { + symbolEncodingType_e llType; + symbolEncodingType_e ofType; + symbolEncodingType_e mlType; + BYTE fseTablesBuffer[ZSTD_MAX_FSE_HEADERS_SIZE]; + size_t fseTablesSize; + size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_entropyCompressSeqStore_internal() */ +} ZSTD_fseCTablesMetadata_t; + +typedef struct { + ZSTD_hufCTablesMetadata_t hufMetadata; + ZSTD_fseCTablesMetadata_t fseMetadata; +} ZSTD_entropyCTablesMetadata_t; + +/** ZSTD_buildBlockEntropyStats() : + * Builds entropy for the block. + * @return : 0 on success or error code */ +size_t ZSTD_buildBlockEntropyStats( + const seqStore_t* seqStorePtr, + const ZSTD_entropyCTables_t* prevEntropy, + ZSTD_entropyCTables_t* nextEntropy, + const ZSTD_CCtx_params* cctxParams, + ZSTD_entropyCTablesMetadata_t* entropyMetadata, + void* workspace, size_t wkspSize); + +/********************************* +* Compression internals structs * +*********************************/ + +typedef struct { + U32 off; /* Offset sumtype code for the match, using ZSTD_storeSeq() format */ + U32 len; /* Raw length of match */ +} ZSTD_match_t; + +typedef struct { + U32 offset; /* Offset of sequence */ + U32 litLength; /* Length of literals prior to match */ + U32 matchLength; /* Raw length of match */ +} rawSeq; + +typedef struct { + rawSeq* seq; /* The start of the sequences */ + size_t pos; /* The index in seq where reading stopped. pos <= size. */ + size_t posInSequence; /* The position within the sequence at seq[pos] where reading + stopped. posInSequence <= seq[pos].litLength + seq[pos].matchLength */ + size_t size; /* The number of sequences. <= capacity. */ + size_t capacity; /* The capacity starting from `seq` pointer */ +} rawSeqStore_t; + +typedef struct { + U32 idx; /* Index in array of ZSTD_Sequence */ + U32 posInSequence; /* Position within sequence at idx */ + size_t posInSrc; /* Number of bytes given by sequences provided so far */ +} ZSTD_sequencePosition; + +UNUSED_ATTR static const rawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0, 0}; + +typedef struct { + int price; /* price from beginning of segment to this position */ + U32 off; /* offset of previous match */ + U32 mlen; /* length of previous match */ + U32 litlen; /* nb of literals since previous match */ + U32 rep[ZSTD_REP_NUM]; /* offset history after previous match */ +} ZSTD_optimal_t; + +typedef enum { zop_dynamic=0, zop_predef } ZSTD_OptPrice_e; + +#define ZSTD_OPT_SIZE (ZSTD_OPT_NUM+3) +typedef struct { + /* All tables are allocated inside cctx->workspace by ZSTD_resetCCtx_internal() */ + unsigned* litFreq; /* table of literals statistics, of size 256 */ + unsigned* litLengthFreq; /* table of litLength statistics, of size (MaxLL+1) */ + unsigned* matchLengthFreq; /* table of matchLength statistics, of size (MaxML+1) */ + unsigned* offCodeFreq; /* table of offCode statistics, of size (MaxOff+1) */ + ZSTD_match_t* matchTable; /* list of found matches, of size ZSTD_OPT_SIZE */ + ZSTD_optimal_t* priceTable; /* All positions tracked by optimal parser, of size ZSTD_OPT_SIZE */ + + U32 litSum; /* nb of literals */ + U32 litLengthSum; /* nb of litLength codes */ + U32 matchLengthSum; /* nb of matchLength codes */ + U32 offCodeSum; /* nb of offset codes */ + U32 litSumBasePrice; /* to compare to log2(litfreq) */ + U32 litLengthSumBasePrice; /* to compare to log2(llfreq) */ + U32 matchLengthSumBasePrice;/* to compare to log2(mlfreq) */ + U32 offCodeSumBasePrice; /* to compare to log2(offreq) */ + ZSTD_OptPrice_e priceType; /* prices can be determined dynamically, or follow a pre-defined cost structure */ + const ZSTD_entropyCTables_t* symbolCosts; /* pre-calculated dictionary statistics */ + ZSTD_paramSwitch_e literalCompressionMode; +} optState_t; + +typedef struct { + ZSTD_entropyCTables_t entropy; + U32 rep[ZSTD_REP_NUM]; +} ZSTD_compressedBlockState_t; + +typedef struct { + BYTE const* nextSrc; /* next block here to continue on current prefix */ + BYTE const* base; /* All regular indexes relative to this position */ + BYTE const* dictBase; /* extDict indexes relative to this position */ + U32 dictLimit; /* below that point, need extDict */ + U32 lowLimit; /* below that point, no more valid data */ + U32 nbOverflowCorrections; /* Number of times overflow correction has run since + * ZSTD_window_init(). Useful for debugging coredumps + * and for ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY. + */ +} ZSTD_window_t; + +#define ZSTD_WINDOW_START_INDEX 2 + +typedef struct ZSTD_matchState_t ZSTD_matchState_t; + +#define ZSTD_ROW_HASH_CACHE_SIZE 8 /* Size of prefetching hash cache for row-based matchfinder */ + +struct ZSTD_matchState_t { + ZSTD_window_t window; /* State for window round buffer management */ + U32 loadedDictEnd; /* index of end of dictionary, within context's referential. + * When loadedDictEnd != 0, a dictionary is in use, and still valid. + * This relies on a mechanism to set loadedDictEnd=0 when dictionary is no longer within distance. + * Such mechanism is provided within ZSTD_window_enforceMaxDist() and ZSTD_checkDictValidity(). + * When dict referential is copied into active context (i.e. not attached), + * loadedDictEnd == dictSize, since referential starts from zero. + */ + U32 nextToUpdate; /* index from which to continue table update */ + U32 hashLog3; /* dispatch table for matches of len==3 : larger == faster, more memory */ + + U32 rowHashLog; /* For row-based matchfinder: Hashlog based on nb of rows in the hashTable.*/ + BYTE* tagTable; /* For row-based matchFinder: A row-based table containing the hashes and head index. */ + U32 hashCache[ZSTD_ROW_HASH_CACHE_SIZE]; /* For row-based matchFinder: a cache of hashes to improve speed */ + U64 hashSalt; /* For row-based matchFinder: salts the hash for reuse of tag table */ + U32 hashSaltEntropy; /* For row-based matchFinder: collects entropy for salt generation */ + + U32* hashTable; + U32* hashTable3; + U32* chainTable; + + U32 forceNonContiguous; /* Non-zero if we should force non-contiguous load for the next window update. */ + + int dedicatedDictSearch; /* Indicates whether this matchState is using the + * dedicated dictionary search structure. + */ + optState_t opt; /* optimal parser state */ + const ZSTD_matchState_t* dictMatchState; + ZSTD_compressionParameters cParams; + const rawSeqStore_t* ldmSeqStore; + + /* Controls prefetching in some dictMatchState matchfinders. + * This behavior is controlled from the cctx ms. + * This parameter has no effect in the cdict ms. */ + int prefetchCDictTables; + + /* When == 0, lazy match finders insert every position. + * When != 0, lazy match finders only insert positions they search. + * This allows them to skip much faster over incompressible data, + * at a small cost to compression ratio. + */ + int lazySkipping; +}; + +typedef struct { + ZSTD_compressedBlockState_t* prevCBlock; + ZSTD_compressedBlockState_t* nextCBlock; + ZSTD_matchState_t matchState; +} ZSTD_blockState_t; + +typedef struct { + U32 offset; + U32 checksum; +} ldmEntry_t; + +typedef struct { + BYTE const* split; + U32 hash; + U32 checksum; + ldmEntry_t* bucket; +} ldmMatchCandidate_t; + +#define LDM_BATCH_SIZE 64 + +typedef struct { + ZSTD_window_t window; /* State for the window round buffer management */ + ldmEntry_t* hashTable; + U32 loadedDictEnd; + BYTE* bucketOffsets; /* Next position in bucket to insert entry */ + size_t splitIndices[LDM_BATCH_SIZE]; + ldmMatchCandidate_t matchCandidates[LDM_BATCH_SIZE]; +} ldmState_t; + +typedef struct { + ZSTD_paramSwitch_e enableLdm; /* ZSTD_ps_enable to enable LDM. ZSTD_ps_auto by default */ + U32 hashLog; /* Log size of hashTable */ + U32 bucketSizeLog; /* Log bucket size for collision resolution, at most 8 */ + U32 minMatchLength; /* Minimum match length */ + U32 hashRateLog; /* Log number of entries to skip */ + U32 windowLog; /* Window log for the LDM */ +} ldmParams_t; + +typedef struct { + int collectSequences; + ZSTD_Sequence* seqStart; + size_t seqIndex; + size_t maxSequences; +} SeqCollector; + +struct ZSTD_CCtx_params_s { + ZSTD_format_e format; + ZSTD_compressionParameters cParams; + ZSTD_frameParameters fParams; + + int compressionLevel; + int forceWindow; /* force back-references to respect limit of + * 1< 63) ? ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength]; +} + +/* ZSTD_MLcode() : + * note : mlBase = matchLength - MINMATCH; + * because it's the format it's stored in seqStore->sequences */ +MEM_STATIC U32 ZSTD_MLcode(U32 mlBase) +{ + static const BYTE ML_Code[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37, + 38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39, + 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, + 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 }; + static const U32 ML_deltaCode = 36; + return (mlBase > 127) ? ZSTD_highbit32(mlBase) + ML_deltaCode : ML_Code[mlBase]; +} + +/* ZSTD_cParam_withinBounds: + * @return 1 if value is within cParam bounds, + * 0 otherwise */ +MEM_STATIC int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value) +{ + ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); + if (ZSTD_isError(bounds.error)) return 0; + if (value < bounds.lowerBound) return 0; + if (value > bounds.upperBound) return 0; + return 1; +} + +/* ZSTD_noCompressBlock() : + * Writes uncompressed block to dst buffer from given src. + * Returns the size of the block */ +MEM_STATIC size_t +ZSTD_noCompressBlock(void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock) +{ + U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(srcSize << 3); + DEBUGLOG(5, "ZSTD_noCompressBlock (srcSize=%zu, dstCapacity=%zu)", srcSize, dstCapacity); + RETURN_ERROR_IF(srcSize + ZSTD_blockHeaderSize > dstCapacity, + dstSize_tooSmall, "dst buf too small for uncompressed block"); + MEM_writeLE24(dst, cBlockHeader24); + ZSTD_memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize); + return ZSTD_blockHeaderSize + srcSize; +} + +MEM_STATIC size_t +ZSTD_rleCompressBlock(void* dst, size_t dstCapacity, BYTE src, size_t srcSize, U32 lastBlock) +{ + BYTE* const op = (BYTE*)dst; + U32 const cBlockHeader = lastBlock + (((U32)bt_rle)<<1) + (U32)(srcSize << 3); + RETURN_ERROR_IF(dstCapacity < 4, dstSize_tooSmall, ""); + MEM_writeLE24(op, cBlockHeader); + op[3] = src; + return 4; +} + + +/* ZSTD_minGain() : + * minimum compression required + * to generate a compress block or a compressed literals section. + * note : use same formula for both situations */ +MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat) +{ + U32 const minlog = (strat>=ZSTD_btultra) ? (U32)(strat) - 1 : 6; + ZSTD_STATIC_ASSERT(ZSTD_btultra == 8); + assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, (int)strat)); + return (srcSize >> minlog) + 2; +} + +MEM_STATIC int ZSTD_literalsCompressionIsDisabled(const ZSTD_CCtx_params* cctxParams) +{ + switch (cctxParams->literalCompressionMode) { + case ZSTD_ps_enable: + return 0; + case ZSTD_ps_disable: + return 1; + default: + assert(0 /* impossible: pre-validated */); + ZSTD_FALLTHROUGH; + case ZSTD_ps_auto: + return (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > 0); + } +} + +/*! ZSTD_safecopyLiterals() : + * memcpy() function that won't read beyond more than WILDCOPY_OVERLENGTH bytes past ilimit_w. + * Only called when the sequence ends past ilimit_w, so it only needs to be optimized for single + * large copies. + */ +static void +ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE const* ilimit_w) +{ + assert(iend > ilimit_w); + if (ip <= ilimit_w) { + ZSTD_wildcopy(op, ip, ilimit_w - ip, ZSTD_no_overlap); + op += ilimit_w - ip; + ip = ilimit_w; + } + while (ip < iend) *op++ = *ip++; +} + + +#define REPCODE1_TO_OFFBASE REPCODE_TO_OFFBASE(1) +#define REPCODE2_TO_OFFBASE REPCODE_TO_OFFBASE(2) +#define REPCODE3_TO_OFFBASE REPCODE_TO_OFFBASE(3) +#define REPCODE_TO_OFFBASE(r) (assert((r)>=1), assert((r)<=ZSTD_REP_NUM), (r)) /* accepts IDs 1,2,3 */ +#define OFFSET_TO_OFFBASE(o) (assert((o)>0), o + ZSTD_REP_NUM) +#define OFFBASE_IS_OFFSET(o) ((o) > ZSTD_REP_NUM) +#define OFFBASE_IS_REPCODE(o) ( 1 <= (o) && (o) <= ZSTD_REP_NUM) +#define OFFBASE_TO_OFFSET(o) (assert(OFFBASE_IS_OFFSET(o)), (o) - ZSTD_REP_NUM) +#define OFFBASE_TO_REPCODE(o) (assert(OFFBASE_IS_REPCODE(o)), (o)) /* returns ID 1,2,3 */ + +/*! ZSTD_storeSeq() : + * Store a sequence (litlen, litPtr, offBase and matchLength) into seqStore_t. + * @offBase : Users should employ macros REPCODE_TO_OFFBASE() and OFFSET_TO_OFFBASE(). + * @matchLength : must be >= MINMATCH + * Allowed to over-read literals up to litLimit. +*/ +HINT_INLINE UNUSED_ATTR void +ZSTD_storeSeq(seqStore_t* seqStorePtr, + size_t litLength, const BYTE* literals, const BYTE* litLimit, + U32 offBase, + size_t matchLength) +{ + BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH; + BYTE const* const litEnd = literals + litLength; +#if defined(DEBUGLEVEL) && (DEBUGLEVEL >= 6) + static const BYTE* g_start = NULL; + if (g_start==NULL) g_start = (const BYTE*)literals; /* note : index only works for compression within a single segment */ + { U32 const pos = (U32)((const BYTE*)literals - g_start); + DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offBase%7u", + pos, (U32)litLength, (U32)matchLength, (U32)offBase); + } +#endif + assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq); + /* copy Literals */ + assert(seqStorePtr->maxNbLit <= 128 KB); + assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit); + assert(literals + litLength <= litLimit); + if (litEnd <= litLimit_w) { + /* Common case we can use wildcopy. + * First copy 16 bytes, because literals are likely short. + */ + ZSTD_STATIC_ASSERT(WILDCOPY_OVERLENGTH >= 16); + ZSTD_copy16(seqStorePtr->lit, literals); + if (litLength > 16) { + ZSTD_wildcopy(seqStorePtr->lit+16, literals+16, (ptrdiff_t)litLength-16, ZSTD_no_overlap); + } + } else { + ZSTD_safecopyLiterals(seqStorePtr->lit, literals, litEnd, litLimit_w); + } + seqStorePtr->lit += litLength; + + /* literal Length */ + if (litLength>0xFFFF) { + assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */ + seqStorePtr->longLengthType = ZSTD_llt_literalLength; + seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); + } + seqStorePtr->sequences[0].litLength = (U16)litLength; + + /* match offset */ + seqStorePtr->sequences[0].offBase = offBase; + + /* match Length */ + assert(matchLength >= MINMATCH); + { size_t const mlBase = matchLength - MINMATCH; + if (mlBase>0xFFFF) { + assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */ + seqStorePtr->longLengthType = ZSTD_llt_matchLength; + seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); + } + seqStorePtr->sequences[0].mlBase = (U16)mlBase; + } + + seqStorePtr->sequences++; +} + +/* ZSTD_updateRep() : + * updates in-place @rep (array of repeat offsets) + * @offBase : sum-type, using numeric representation of ZSTD_storeSeq() + */ +MEM_STATIC void +ZSTD_updateRep(U32 rep[ZSTD_REP_NUM], U32 const offBase, U32 const ll0) +{ + if (OFFBASE_IS_OFFSET(offBase)) { /* full offset */ + rep[2] = rep[1]; + rep[1] = rep[0]; + rep[0] = OFFBASE_TO_OFFSET(offBase); + } else { /* repcode */ + U32 const repCode = OFFBASE_TO_REPCODE(offBase) - 1 + ll0; + if (repCode > 0) { /* note : if repCode==0, no change */ + U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode]; + rep[2] = (repCode >= 2) ? rep[1] : rep[2]; + rep[1] = rep[0]; + rep[0] = currentOffset; + } else { /* repCode == 0 */ + /* nothing to do */ + } + } +} + +typedef struct repcodes_s { + U32 rep[3]; +} repcodes_t; + +MEM_STATIC repcodes_t +ZSTD_newRep(U32 const rep[ZSTD_REP_NUM], U32 const offBase, U32 const ll0) +{ + repcodes_t newReps; + ZSTD_memcpy(&newReps, rep, sizeof(newReps)); + ZSTD_updateRep(newReps.rep, offBase, ll0); + return newReps; +} + + +/*-************************************* +* Match length counter +***************************************/ +MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* const pInLimit) +{ + const BYTE* const pStart = pIn; + const BYTE* const pInLoopLimit = pInLimit - (sizeof(size_t)-1); + + if (pIn < pInLoopLimit) { + { size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn); + if (diff) return ZSTD_NbCommonBytes(diff); } + pIn+=sizeof(size_t); pMatch+=sizeof(size_t); + while (pIn < pInLoopLimit) { + size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn); + if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; } + pIn += ZSTD_NbCommonBytes(diff); + return (size_t)(pIn - pStart); + } } + if (MEM_64bits() && (pIn<(pInLimit-3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) { pIn+=4; pMatch+=4; } + if ((pIn<(pInLimit-1)) && (MEM_read16(pMatch) == MEM_read16(pIn))) { pIn+=2; pMatch+=2; } + if ((pIn> (32-h) ; } +MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h, 0); } /* only in zstd_opt.h */ +MEM_STATIC size_t ZSTD_hash3PtrS(const void* ptr, U32 h, U32 s) { return ZSTD_hash3(MEM_readLE32(ptr), h, s); } + +static const U32 prime4bytes = 2654435761U; +static U32 ZSTD_hash4(U32 u, U32 h, U32 s) { assert(h <= 32); return ((u * prime4bytes) ^ s) >> (32-h) ; } +static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_readLE32(ptr), h, 0); } +static size_t ZSTD_hash4PtrS(const void* ptr, U32 h, U32 s) { return ZSTD_hash4(MEM_readLE32(ptr), h, s); } + +static const U64 prime5bytes = 889523592379ULL; +static size_t ZSTD_hash5(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u << (64-40)) * prime5bytes) ^ s) >> (64-h)) ; } +static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h, 0); } +static size_t ZSTD_hash5PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash5(MEM_readLE64(p), h, s); } + +static const U64 prime6bytes = 227718039650203ULL; +static size_t ZSTD_hash6(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u << (64-48)) * prime6bytes) ^ s) >> (64-h)) ; } +static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h, 0); } +static size_t ZSTD_hash6PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash6(MEM_readLE64(p), h, s); } + +static const U64 prime7bytes = 58295818150454627ULL; +static size_t ZSTD_hash7(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u << (64-56)) * prime7bytes) ^ s) >> (64-h)) ; } +static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h, 0); } +static size_t ZSTD_hash7PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash7(MEM_readLE64(p), h, s); } + +static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL; +static size_t ZSTD_hash8(U64 u, U32 h, U64 s) { assert(h <= 64); return (size_t)((((u) * prime8bytes) ^ s) >> (64-h)) ; } +static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h, 0); } +static size_t ZSTD_hash8PtrS(const void* p, U32 h, U64 s) { return ZSTD_hash8(MEM_readLE64(p), h, s); } + + +MEM_STATIC FORCE_INLINE_ATTR +size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls) +{ + /* Although some of these hashes do support hBits up to 64, some do not. + * To be on the safe side, always avoid hBits > 32. */ + assert(hBits <= 32); + + switch(mls) + { + default: + case 4: return ZSTD_hash4Ptr(p, hBits); + case 5: return ZSTD_hash5Ptr(p, hBits); + case 6: return ZSTD_hash6Ptr(p, hBits); + case 7: return ZSTD_hash7Ptr(p, hBits); + case 8: return ZSTD_hash8Ptr(p, hBits); + } +} + +MEM_STATIC FORCE_INLINE_ATTR +size_t ZSTD_hashPtrSalted(const void* p, U32 hBits, U32 mls, const U64 hashSalt) { + /* Although some of these hashes do support hBits up to 64, some do not. + * To be on the safe side, always avoid hBits > 32. */ + assert(hBits <= 32); + + switch(mls) + { + default: + case 4: return ZSTD_hash4PtrS(p, hBits, (U32)hashSalt); + case 5: return ZSTD_hash5PtrS(p, hBits, hashSalt); + case 6: return ZSTD_hash6PtrS(p, hBits, hashSalt); + case 7: return ZSTD_hash7PtrS(p, hBits, hashSalt); + case 8: return ZSTD_hash8PtrS(p, hBits, hashSalt); + } +} + + +/** ZSTD_ipow() : + * Return base^exponent. + */ +static U64 ZSTD_ipow(U64 base, U64 exponent) +{ + U64 power = 1; + while (exponent) { + if (exponent & 1) power *= base; + exponent >>= 1; + base *= base; + } + return power; +} + +#define ZSTD_ROLL_HASH_CHAR_OFFSET 10 + +/** ZSTD_rollingHash_append() : + * Add the buffer to the hash value. + */ +static U64 ZSTD_rollingHash_append(U64 hash, void const* buf, size_t size) +{ + BYTE const* istart = (BYTE const*)buf; + size_t pos; + for (pos = 0; pos < size; ++pos) { + hash *= prime8bytes; + hash += istart[pos] + ZSTD_ROLL_HASH_CHAR_OFFSET; + } + return hash; +} + +/** ZSTD_rollingHash_compute() : + * Compute the rolling hash value of the buffer. + */ +MEM_STATIC U64 ZSTD_rollingHash_compute(void const* buf, size_t size) +{ + return ZSTD_rollingHash_append(0, buf, size); +} + +/** ZSTD_rollingHash_primePower() : + * Compute the primePower to be passed to ZSTD_rollingHash_rotate() for a hash + * over a window of length bytes. + */ +MEM_STATIC U64 ZSTD_rollingHash_primePower(U32 length) +{ + return ZSTD_ipow(prime8bytes, length - 1); +} + +/** ZSTD_rollingHash_rotate() : + * Rotate the rolling hash by one byte. + */ +MEM_STATIC U64 ZSTD_rollingHash_rotate(U64 hash, BYTE toRemove, BYTE toAdd, U64 primePower) +{ + hash -= (toRemove + ZSTD_ROLL_HASH_CHAR_OFFSET) * primePower; + hash *= prime8bytes; + hash += toAdd + ZSTD_ROLL_HASH_CHAR_OFFSET; + return hash; +} + +/*-************************************* +* Round buffer management +***************************************/ +#if (ZSTD_WINDOWLOG_MAX_64 > 31) +# error "ZSTD_WINDOWLOG_MAX is too large : would overflow ZSTD_CURRENT_MAX" +#endif +/* Max current allowed */ +#define ZSTD_CURRENT_MAX ((3U << 29) + (1U << ZSTD_WINDOWLOG_MAX)) +/* Maximum chunk size before overflow correction needs to be called again */ +#define ZSTD_CHUNKSIZE_MAX \ + ( ((U32)-1) /* Maximum ending current index */ \ + - ZSTD_CURRENT_MAX) /* Maximum beginning lowLimit */ + +/** + * ZSTD_window_clear(): + * Clears the window containing the history by simply setting it to empty. + */ +MEM_STATIC void ZSTD_window_clear(ZSTD_window_t* window) +{ + size_t const endT = (size_t)(window->nextSrc - window->base); + U32 const end = (U32)endT; + + window->lowLimit = end; + window->dictLimit = end; +} + +MEM_STATIC U32 ZSTD_window_isEmpty(ZSTD_window_t const window) +{ + return window.dictLimit == ZSTD_WINDOW_START_INDEX && + window.lowLimit == ZSTD_WINDOW_START_INDEX && + (window.nextSrc - window.base) == ZSTD_WINDOW_START_INDEX; +} + +/** + * ZSTD_window_hasExtDict(): + * Returns non-zero if the window has a non-empty extDict. + */ +MEM_STATIC U32 ZSTD_window_hasExtDict(ZSTD_window_t const window) +{ + return window.lowLimit < window.dictLimit; +} + +/** + * ZSTD_matchState_dictMode(): + * Inspects the provided matchState and figures out what dictMode should be + * passed to the compressor. + */ +MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_matchState_t *ms) +{ + return ZSTD_window_hasExtDict(ms->window) ? + ZSTD_extDict : + ms->dictMatchState != NULL ? + (ms->dictMatchState->dedicatedDictSearch ? ZSTD_dedicatedDictSearch : ZSTD_dictMatchState) : + ZSTD_noDict; +} + +/* Defining this macro to non-zero tells zstd to run the overflow correction + * code much more frequently. This is very inefficient, and should only be + * used for tests and fuzzers. + */ +#ifndef ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY +# ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION +# define ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY 1 +# else +# define ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY 0 +# endif +#endif + +/** + * ZSTD_window_canOverflowCorrect(): + * Returns non-zero if the indices are large enough for overflow correction + * to work correctly without impacting compression ratio. + */ +MEM_STATIC U32 ZSTD_window_canOverflowCorrect(ZSTD_window_t const window, + U32 cycleLog, + U32 maxDist, + U32 loadedDictEnd, + void const* src) +{ + U32 const cycleSize = 1u << cycleLog; + U32 const curr = (U32)((BYTE const*)src - window.base); + U32 const minIndexToOverflowCorrect = cycleSize + + MAX(maxDist, cycleSize) + + ZSTD_WINDOW_START_INDEX; + + /* Adjust the min index to backoff the overflow correction frequency, + * so we don't waste too much CPU in overflow correction. If this + * computation overflows we don't really care, we just need to make + * sure it is at least minIndexToOverflowCorrect. + */ + U32 const adjustment = window.nbOverflowCorrections + 1; + U32 const adjustedIndex = MAX(minIndexToOverflowCorrect * adjustment, + minIndexToOverflowCorrect); + U32 const indexLargeEnough = curr > adjustedIndex; + + /* Only overflow correct early if the dictionary is invalidated already, + * so we don't hurt compression ratio. + */ + U32 const dictionaryInvalidated = curr > maxDist + loadedDictEnd; + + return indexLargeEnough && dictionaryInvalidated; +} + +/** + * ZSTD_window_needOverflowCorrection(): + * Returns non-zero if the indices are getting too large and need overflow + * protection. + */ +MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window, + U32 cycleLog, + U32 maxDist, + U32 loadedDictEnd, + void const* src, + void const* srcEnd) +{ + U32 const curr = (U32)((BYTE const*)srcEnd - window.base); + if (ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY) { + if (ZSTD_window_canOverflowCorrect(window, cycleLog, maxDist, loadedDictEnd, src)) { + return 1; + } + } + return curr > ZSTD_CURRENT_MAX; +} + +/** + * ZSTD_window_correctOverflow(): + * Reduces the indices to protect from index overflow. + * Returns the correction made to the indices, which must be applied to every + * stored index. + * + * The least significant cycleLog bits of the indices must remain the same, + * which may be 0. Every index up to maxDist in the past must be valid. + */ +MEM_STATIC +ZSTD_ALLOW_POINTER_OVERFLOW_ATTR +U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog, + U32 maxDist, void const* src) +{ + /* preemptive overflow correction: + * 1. correction is large enough: + * lowLimit > (3<<29) ==> current > 3<<29 + 1< (3<<29 + 1< (3<<29) - (1< (3<<29) - (1<<30) (NOTE: chainLog <= 30) + * > 1<<29 + * + * 2. (ip+ZSTD_CHUNKSIZE_MAX - cctx->base) doesn't overflow: + * After correction, current is less than (1<base < 1<<32. + * 3. (cctx->lowLimit + 1< 3<<29 + 1<base); + U32 const currentCycle = curr & cycleMask; + /* Ensure newCurrent - maxDist >= ZSTD_WINDOW_START_INDEX. */ + U32 const currentCycleCorrection = currentCycle < ZSTD_WINDOW_START_INDEX + ? MAX(cycleSize, ZSTD_WINDOW_START_INDEX) + : 0; + U32 const newCurrent = currentCycle + + currentCycleCorrection + + MAX(maxDist, cycleSize); + U32 const correction = curr - newCurrent; + /* maxDist must be a power of two so that: + * (newCurrent & cycleMask) == (curr & cycleMask) + * This is required to not corrupt the chains / binary tree. + */ + assert((maxDist & (maxDist - 1)) == 0); + assert((curr & cycleMask) == (newCurrent & cycleMask)); + assert(curr > newCurrent); + if (!ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY) { + /* Loose bound, should be around 1<<29 (see above) */ + assert(correction > 1<<28); + } + + window->base += correction; + window->dictBase += correction; + if (window->lowLimit < correction + ZSTD_WINDOW_START_INDEX) { + window->lowLimit = ZSTD_WINDOW_START_INDEX; + } else { + window->lowLimit -= correction; + } + if (window->dictLimit < correction + ZSTD_WINDOW_START_INDEX) { + window->dictLimit = ZSTD_WINDOW_START_INDEX; + } else { + window->dictLimit -= correction; + } + + /* Ensure we can still reference the full window. */ + assert(newCurrent >= maxDist); + assert(newCurrent - maxDist >= ZSTD_WINDOW_START_INDEX); + /* Ensure that lowLimit and dictLimit didn't underflow. */ + assert(window->lowLimit <= newCurrent); + assert(window->dictLimit <= newCurrent); + + ++window->nbOverflowCorrections; + + DEBUGLOG(4, "Correction of 0x%x bytes to lowLimit=0x%x", correction, + window->lowLimit); + return correction; +} + +/** + * ZSTD_window_enforceMaxDist(): + * Updates lowLimit so that: + * (srcEnd - base) - lowLimit == maxDist + loadedDictEnd + * + * It ensures index is valid as long as index >= lowLimit. + * This must be called before a block compression call. + * + * loadedDictEnd is only defined if a dictionary is in use for current compression. + * As the name implies, loadedDictEnd represents the index at end of dictionary. + * The value lies within context's referential, it can be directly compared to blockEndIdx. + * + * If loadedDictEndPtr is NULL, no dictionary is in use, and we use loadedDictEnd == 0. + * If loadedDictEndPtr is not NULL, we set it to zero after updating lowLimit. + * This is because dictionaries are allowed to be referenced fully + * as long as the last byte of the dictionary is in the window. + * Once input has progressed beyond window size, dictionary cannot be referenced anymore. + * + * In normal dict mode, the dictionary lies between lowLimit and dictLimit. + * In dictMatchState mode, lowLimit and dictLimit are the same, + * and the dictionary is below them. + * forceWindow and dictMatchState are therefore incompatible. + */ +MEM_STATIC void +ZSTD_window_enforceMaxDist(ZSTD_window_t* window, + const void* blockEnd, + U32 maxDist, + U32* loadedDictEndPtr, + const ZSTD_matchState_t** dictMatchStatePtr) +{ + U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base); + U32 const loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0; + DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u", + (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd); + + /* - When there is no dictionary : loadedDictEnd == 0. + In which case, the test (blockEndIdx > maxDist) is merely to avoid + overflowing next operation `newLowLimit = blockEndIdx - maxDist`. + - When there is a standard dictionary : + Index referential is copied from the dictionary, + which means it starts from 0. + In which case, loadedDictEnd == dictSize, + and it makes sense to compare `blockEndIdx > maxDist + dictSize` + since `blockEndIdx` also starts from zero. + - When there is an attached dictionary : + loadedDictEnd is expressed within the referential of the context, + so it can be directly compared against blockEndIdx. + */ + if (blockEndIdx > maxDist + loadedDictEnd) { + U32 const newLowLimit = blockEndIdx - maxDist; + if (window->lowLimit < newLowLimit) window->lowLimit = newLowLimit; + if (window->dictLimit < window->lowLimit) { + DEBUGLOG(5, "Update dictLimit to match lowLimit, from %u to %u", + (unsigned)window->dictLimit, (unsigned)window->lowLimit); + window->dictLimit = window->lowLimit; + } + /* On reaching window size, dictionaries are invalidated */ + if (loadedDictEndPtr) *loadedDictEndPtr = 0; + if (dictMatchStatePtr) *dictMatchStatePtr = NULL; + } +} + +/* Similar to ZSTD_window_enforceMaxDist(), + * but only invalidates dictionary + * when input progresses beyond window size. + * assumption : loadedDictEndPtr and dictMatchStatePtr are valid (non NULL) + * loadedDictEnd uses same referential as window->base + * maxDist is the window size */ +MEM_STATIC void +ZSTD_checkDictValidity(const ZSTD_window_t* window, + const void* blockEnd, + U32 maxDist, + U32* loadedDictEndPtr, + const ZSTD_matchState_t** dictMatchStatePtr) +{ + assert(loadedDictEndPtr != NULL); + assert(dictMatchStatePtr != NULL); + { U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base); + U32 const loadedDictEnd = *loadedDictEndPtr; + DEBUGLOG(5, "ZSTD_checkDictValidity: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u", + (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd); + assert(blockEndIdx >= loadedDictEnd); + + if (blockEndIdx > loadedDictEnd + maxDist || loadedDictEnd != window->dictLimit) { + /* On reaching window size, dictionaries are invalidated. + * For simplification, if window size is reached anywhere within next block, + * the dictionary is invalidated for the full block. + * + * We also have to invalidate the dictionary if ZSTD_window_update() has detected + * non-contiguous segments, which means that loadedDictEnd != window->dictLimit. + * loadedDictEnd may be 0, if forceWindow is true, but in that case we never use + * dictMatchState, so setting it to NULL is not a problem. + */ + DEBUGLOG(6, "invalidating dictionary for current block (distance > windowSize)"); + *loadedDictEndPtr = 0; + *dictMatchStatePtr = NULL; + } else { + if (*loadedDictEndPtr != 0) { + DEBUGLOG(6, "dictionary considered valid for current block"); + } } } +} + +MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) { + ZSTD_memset(window, 0, sizeof(*window)); + window->base = (BYTE const*)" "; + window->dictBase = (BYTE const*)" "; + ZSTD_STATIC_ASSERT(ZSTD_DUBT_UNSORTED_MARK < ZSTD_WINDOW_START_INDEX); /* Start above ZSTD_DUBT_UNSORTED_MARK */ + window->dictLimit = ZSTD_WINDOW_START_INDEX; /* start from >0, so that 1st position is valid */ + window->lowLimit = ZSTD_WINDOW_START_INDEX; /* it ensures first and later CCtx usages compress the same */ + window->nextSrc = window->base + ZSTD_WINDOW_START_INDEX; /* see issue #1241 */ + window->nbOverflowCorrections = 0; +} + +/** + * ZSTD_window_update(): + * Updates the window by appending [src, src + srcSize) to the window. + * If it is not contiguous, the current prefix becomes the extDict, and we + * forget about the extDict. Handles overlap of the prefix and extDict. + * Returns non-zero if the segment is contiguous. + */ +MEM_STATIC +ZSTD_ALLOW_POINTER_OVERFLOW_ATTR +U32 ZSTD_window_update(ZSTD_window_t* window, + void const* src, size_t srcSize, + int forceNonContiguous) +{ + BYTE const* const ip = (BYTE const*)src; + U32 contiguous = 1; + DEBUGLOG(5, "ZSTD_window_update"); + if (srcSize == 0) + return contiguous; + assert(window->base != NULL); + assert(window->dictBase != NULL); + /* Check if blocks follow each other */ + if (src != window->nextSrc || forceNonContiguous) { + /* not contiguous */ + size_t const distanceFromBase = (size_t)(window->nextSrc - window->base); + DEBUGLOG(5, "Non contiguous blocks, new segment starts at %u", window->dictLimit); + window->lowLimit = window->dictLimit; + assert(distanceFromBase == (size_t)(U32)distanceFromBase); /* should never overflow */ + window->dictLimit = (U32)distanceFromBase; + window->dictBase = window->base; + window->base = ip - distanceFromBase; + /* ms->nextToUpdate = window->dictLimit; */ + if (window->dictLimit - window->lowLimit < HASH_READ_SIZE) window->lowLimit = window->dictLimit; /* too small extDict */ + contiguous = 0; + } + window->nextSrc = ip + srcSize; + /* if input and dictionary overlap : reduce dictionary (area presumed modified by input) */ + if ( (ip+srcSize > window->dictBase + window->lowLimit) + & (ip < window->dictBase + window->dictLimit)) { + ptrdiff_t const highInputIdx = (ip + srcSize) - window->dictBase; + U32 const lowLimitMax = (highInputIdx > (ptrdiff_t)window->dictLimit) ? window->dictLimit : (U32)highInputIdx; + window->lowLimit = lowLimitMax; + DEBUGLOG(5, "Overlapping extDict and input : new lowLimit = %u", window->lowLimit); + } + return contiguous; +} + +/** + * Returns the lowest allowed match index. It may either be in the ext-dict or the prefix. + */ +MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog) +{ + U32 const maxDistance = 1U << windowLog; + U32 const lowestValid = ms->window.lowLimit; + U32 const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid; + U32 const isDictionary = (ms->loadedDictEnd != 0); + /* When using a dictionary the entire dictionary is valid if a single byte of the dictionary + * is within the window. We invalidate the dictionary (and set loadedDictEnd to 0) when it isn't + * valid for the entire block. So this check is sufficient to find the lowest valid match index. + */ + U32 const matchLowest = isDictionary ? lowestValid : withinWindow; + return matchLowest; +} + +/** + * Returns the lowest allowed match index in the prefix. + */ +MEM_STATIC U32 ZSTD_getLowestPrefixIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog) +{ + U32 const maxDistance = 1U << windowLog; + U32 const lowestValid = ms->window.dictLimit; + U32 const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid; + U32 const isDictionary = (ms->loadedDictEnd != 0); + /* When computing the lowest prefix index we need to take the dictionary into account to handle + * the edge case where the dictionary and the source are contiguous in memory. + */ + U32 const matchLowest = isDictionary ? lowestValid : withinWindow; + return matchLowest; +} + + + +/* debug functions */ +#if (DEBUGLEVEL>=2) + +MEM_STATIC double ZSTD_fWeight(U32 rawStat) +{ + U32 const fp_accuracy = 8; + U32 const fp_multiplier = (1 << fp_accuracy); + U32 const newStat = rawStat + 1; + U32 const hb = ZSTD_highbit32(newStat); + U32 const BWeight = hb * fp_multiplier; + U32 const FWeight = (newStat << fp_accuracy) >> hb; + U32 const weight = BWeight + FWeight; + assert(hb + fp_accuracy < 31); + return (double)weight / fp_multiplier; +} + +/* display a table content, + * listing each element, its frequency, and its predicted bit cost */ +MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max) +{ + unsigned u, sum; + for (u=0, sum=0; u<=max; u++) sum += table[u]; + DEBUGLOG(2, "total nb elts: %u", sum); + for (u=0; u<=max; u++) { + DEBUGLOG(2, "%2u: %5u (%.2f)", + u, table[u], ZSTD_fWeight(sum) - ZSTD_fWeight(table[u]) ); + } +} + +#endif + +/* Short Cache */ + +/* Normally, zstd matchfinders follow this flow: + * 1. Compute hash at ip + * 2. Load index from hashTable[hash] + * 3. Check if *ip == *(base + index) + * In dictionary compression, loading *(base + index) is often an L2 or even L3 miss. + * + * Short cache is an optimization which allows us to avoid step 3 most of the time + * when the data doesn't actually match. With short cache, the flow becomes: + * 1. Compute (hash, currentTag) at ip. currentTag is an 8-bit independent hash at ip. + * 2. Load (index, matchTag) from hashTable[hash]. See ZSTD_writeTaggedIndex to understand how this works. + * 3. Only if currentTag == matchTag, check *ip == *(base + index). Otherwise, continue. + * + * Currently, short cache is only implemented in CDict hashtables. Thus, its use is limited to + * dictMatchState matchfinders. + */ +#define ZSTD_SHORT_CACHE_TAG_BITS 8 +#define ZSTD_SHORT_CACHE_TAG_MASK ((1u << ZSTD_SHORT_CACHE_TAG_BITS) - 1) + +/* Helper function for ZSTD_fillHashTable and ZSTD_fillDoubleHashTable. + * Unpacks hashAndTag into (hash, tag), then packs (index, tag) into hashTable[hash]. */ +MEM_STATIC void ZSTD_writeTaggedIndex(U32* const hashTable, size_t hashAndTag, U32 index) { + size_t const hash = hashAndTag >> ZSTD_SHORT_CACHE_TAG_BITS; + U32 const tag = (U32)(hashAndTag & ZSTD_SHORT_CACHE_TAG_MASK); + assert(index >> (32 - ZSTD_SHORT_CACHE_TAG_BITS) == 0); + hashTable[hash] = (index << ZSTD_SHORT_CACHE_TAG_BITS) | tag; +} + +/* Helper function for short cache matchfinders. + * Unpacks tag1 and tag2 from lower bits of packedTag1 and packedTag2, then checks if the tags match. */ +MEM_STATIC int ZSTD_comparePackedTags(size_t packedTag1, size_t packedTag2) { + U32 const tag1 = packedTag1 & ZSTD_SHORT_CACHE_TAG_MASK; + U32 const tag2 = packedTag2 & ZSTD_SHORT_CACHE_TAG_MASK; + return tag1 == tag2; +} + +#if defined (__cplusplus) +} +#endif + +/* =============================================================== + * Shared internal declarations + * These prototypes may be called from sources not in lib/compress + * =============================================================== */ + +/* ZSTD_loadCEntropy() : + * dict : must point at beginning of a valid zstd dictionary. + * return : size of dictionary header (size of magic number + dict ID + entropy tables) + * assumptions : magic number supposed already checked + * and dictSize >= 8 */ +size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace, + const void* const dict, size_t dictSize); + +void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs); + +/* ============================================================== + * Private declarations + * These prototypes shall only be called from within lib/compress + * ============================================================== */ + +/* ZSTD_getCParamsFromCCtxParams() : + * cParams are built depending on compressionLevel, src size hints, + * LDM and manually set compression parameters. + * Note: srcSizeHint == 0 means 0! + */ +ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams( + const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode); + +/*! ZSTD_initCStream_internal() : + * Private use only. Init streaming operation. + * expects params to be valid. + * must receive dict, or cdict, or none, but not both. + * @return : 0, or an error code */ +size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs, + const void* dict, size_t dictSize, + const ZSTD_CDict* cdict, + const ZSTD_CCtx_params* params, unsigned long long pledgedSrcSize); + +void ZSTD_resetSeqStore(seqStore_t* ssPtr); + +/*! ZSTD_getCParamsFromCDict() : + * as the name implies */ +ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict); + +/* ZSTD_compressBegin_advanced_internal() : + * Private use only. To be called from zstdmt_compress.c. */ +size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx, + const void* dict, size_t dictSize, + ZSTD_dictContentType_e dictContentType, + ZSTD_dictTableLoadMethod_e dtlm, + const ZSTD_CDict* cdict, + const ZSTD_CCtx_params* params, + unsigned long long pledgedSrcSize); + +/* ZSTD_compress_advanced_internal() : + * Private use only. To be called from zstdmt_compress.c. */ +size_t ZSTD_compress_advanced_internal(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + const ZSTD_CCtx_params* params); + + +/* ZSTD_writeLastEmptyBlock() : + * output an empty Block with end-of-frame mark to complete a frame + * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h)) + * or an error code if `dstCapacity` is too small ( 1 */ +U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat); + +/** ZSTD_CCtx_trace() : + * Trace the end of a compression call. + */ +void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize); + +/* Returns 0 on success, and a ZSTD_error otherwise. This function scans through an array of + * ZSTD_Sequence, storing the sequences it finds, until it reaches a block delimiter. + * Note that the block delimiter must include the last literals of the block. + */ +size_t +ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx, + ZSTD_sequencePosition* seqPos, + const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, + const void* src, size_t blockSize, ZSTD_paramSwitch_e externalRepSearch); + +/* Returns the number of bytes to move the current read position back by. + * Only non-zero if we ended up splitting a sequence. + * Otherwise, it may return a ZSTD error if something went wrong. + * + * This function will attempt to scan through blockSize bytes + * represented by the sequences in @inSeqs, + * storing any (partial) sequences. + * + * Occasionally, we may want to change the actual number of bytes we consumed from inSeqs to + * avoid splitting a match, or to avoid splitting a match such that it would produce a match + * smaller than MINMATCH. In this case, we return the number of bytes that we didn't read from this block. + */ +size_t +ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos, + const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, + const void* src, size_t blockSize, ZSTD_paramSwitch_e externalRepSearch); + +/* Returns 1 if an external sequence producer is registered, otherwise returns 0. */ +MEM_STATIC int ZSTD_hasExtSeqProd(const ZSTD_CCtx_params* params) { + return params->extSeqProdFunc != NULL; +} + +/* =============================================================== + * Deprecated definitions that are still used internally to avoid + * deprecation warnings. These functions are exactly equivalent to + * their public variants, but avoid the deprecation warnings. + * =============================================================== */ + +size_t ZSTD_compressBegin_usingCDict_deprecated(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); + +size_t ZSTD_compressContinue_public(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize); + +size_t ZSTD_compressEnd_public(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize); + +size_t ZSTD_compressBlock_deprecated(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); + + +#endif /* ZSTD_COMPRESS_H */ diff --git a/c-blosc/internal-complibs/zstd-1.5.6/compress/zstd_compress_literals.c b/c-blosc/internal-complibs/zstd-1.5.6/compress/zstd_compress_literals.c new file mode 100644 index 0000000..bfd4f11 --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.5.6/compress/zstd_compress_literals.c @@ -0,0 +1,235 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + /*-************************************* + * Dependencies + ***************************************/ +#include "zstd_compress_literals.h" + + +/* ************************************************************** +* Debug Traces +****************************************************************/ +#if DEBUGLEVEL >= 2 + +static size_t showHexa(const void* src, size_t srcSize) +{ + const BYTE* const ip = (const BYTE*)src; + size_t u; + for (u=0; u31) + (srcSize>4095); + + DEBUGLOG(5, "ZSTD_noCompressLiterals: srcSize=%zu, dstCapacity=%zu", srcSize, dstCapacity); + + RETURN_ERROR_IF(srcSize + flSize > dstCapacity, dstSize_tooSmall, ""); + + switch(flSize) + { + case 1: /* 2 - 1 - 5 */ + ostart[0] = (BYTE)((U32)set_basic + (srcSize<<3)); + break; + case 2: /* 2 - 2 - 12 */ + MEM_writeLE16(ostart, (U16)((U32)set_basic + (1<<2) + (srcSize<<4))); + break; + case 3: /* 2 - 2 - 20 */ + MEM_writeLE32(ostart, (U32)((U32)set_basic + (3<<2) + (srcSize<<4))); + break; + default: /* not necessary : flSize is {1,2,3} */ + assert(0); + } + + ZSTD_memcpy(ostart + flSize, src, srcSize); + DEBUGLOG(5, "Raw (uncompressed) literals: %u -> %u", (U32)srcSize, (U32)(srcSize + flSize)); + return srcSize + flSize; +} + +static int allBytesIdentical(const void* src, size_t srcSize) +{ + assert(srcSize >= 1); + assert(src != NULL); + { const BYTE b = ((const BYTE*)src)[0]; + size_t p; + for (p=1; p31) + (srcSize>4095); + + assert(dstCapacity >= 4); (void)dstCapacity; + assert(allBytesIdentical(src, srcSize)); + + switch(flSize) + { + case 1: /* 2 - 1 - 5 */ + ostart[0] = (BYTE)((U32)set_rle + (srcSize<<3)); + break; + case 2: /* 2 - 2 - 12 */ + MEM_writeLE16(ostart, (U16)((U32)set_rle + (1<<2) + (srcSize<<4))); + break; + case 3: /* 2 - 2 - 20 */ + MEM_writeLE32(ostart, (U32)((U32)set_rle + (3<<2) + (srcSize<<4))); + break; + default: /* not necessary : flSize is {1,2,3} */ + assert(0); + } + + ostart[flSize] = *(const BYTE*)src; + DEBUGLOG(5, "RLE : Repeated Literal (%02X: %u times) -> %u bytes encoded", ((const BYTE*)src)[0], (U32)srcSize, (U32)flSize + 1); + return flSize+1; +} + +/* ZSTD_minLiteralsToCompress() : + * returns minimal amount of literals + * for literal compression to even be attempted. + * Minimum is made tighter as compression strategy increases. + */ +static size_t +ZSTD_minLiteralsToCompress(ZSTD_strategy strategy, HUF_repeat huf_repeat) +{ + assert((int)strategy >= 0); + assert((int)strategy <= 9); + /* btultra2 : min 8 bytes; + * then 2x larger for each successive compression strategy + * max threshold 64 bytes */ + { int const shift = MIN(9-(int)strategy, 3); + size_t const mintc = (huf_repeat == HUF_repeat_valid) ? 6 : (size_t)8 << shift; + DEBUGLOG(7, "minLiteralsToCompress = %zu", mintc); + return mintc; + } +} + +size_t ZSTD_compressLiterals ( + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + void* entropyWorkspace, size_t entropyWorkspaceSize, + const ZSTD_hufCTables_t* prevHuf, + ZSTD_hufCTables_t* nextHuf, + ZSTD_strategy strategy, + int disableLiteralCompression, + int suspectUncompressible, + int bmi2) +{ + size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB); + BYTE* const ostart = (BYTE*)dst; + U32 singleStream = srcSize < 256; + symbolEncodingType_e hType = set_compressed; + size_t cLitSize; + + DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i, srcSize=%u, dstCapacity=%zu)", + disableLiteralCompression, (U32)srcSize, dstCapacity); + + DEBUGLOG(6, "Completed literals listing (%zu bytes)", showHexa(src, srcSize)); + + /* Prepare nextEntropy assuming reusing the existing table */ + ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); + + if (disableLiteralCompression) + return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); + + /* if too small, don't even attempt compression (speed opt) */ + if (srcSize < ZSTD_minLiteralsToCompress(strategy, prevHuf->repeatMode)) + return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); + + RETURN_ERROR_IF(dstCapacity < lhSize+1, dstSize_tooSmall, "not enough space for compression"); + { HUF_repeat repeat = prevHuf->repeatMode; + int const flags = 0 + | (bmi2 ? HUF_flags_bmi2 : 0) + | (strategy < ZSTD_lazy && srcSize <= 1024 ? HUF_flags_preferRepeat : 0) + | (strategy >= HUF_OPTIMAL_DEPTH_THRESHOLD ? HUF_flags_optimalDepth : 0) + | (suspectUncompressible ? HUF_flags_suspectUncompressible : 0); + + typedef size_t (*huf_compress_f)(void*, size_t, const void*, size_t, unsigned, unsigned, void*, size_t, HUF_CElt*, HUF_repeat*, int); + huf_compress_f huf_compress; + if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1; + huf_compress = singleStream ? HUF_compress1X_repeat : HUF_compress4X_repeat; + cLitSize = huf_compress(ostart+lhSize, dstCapacity-lhSize, + src, srcSize, + HUF_SYMBOLVALUE_MAX, LitHufLog, + entropyWorkspace, entropyWorkspaceSize, + (HUF_CElt*)nextHuf->CTable, + &repeat, flags); + DEBUGLOG(5, "%zu literals compressed into %zu bytes (before header)", srcSize, cLitSize); + if (repeat != HUF_repeat_none) { + /* reused the existing table */ + DEBUGLOG(5, "reusing statistics from previous huffman block"); + hType = set_repeat; + } + } + + { size_t const minGain = ZSTD_minGain(srcSize, strategy); + if ((cLitSize==0) || (cLitSize >= srcSize - minGain) || ERR_isError(cLitSize)) { + ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); + return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); + } } + if (cLitSize==1) { + /* A return value of 1 signals that the alphabet consists of a single symbol. + * However, in some rare circumstances, it could be the compressed size (a single byte). + * For that outcome to have a chance to happen, it's necessary that `srcSize < 8`. + * (it's also necessary to not generate statistics). + * Therefore, in such a case, actively check that all bytes are identical. */ + if ((srcSize >= 8) || allBytesIdentical(src, srcSize)) { + ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); + return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize); + } } + + if (hType == set_compressed) { + /* using a newly constructed table */ + nextHuf->repeatMode = HUF_repeat_check; + } + + /* Build header */ + switch(lhSize) + { + case 3: /* 2 - 2 - 10 - 10 */ + if (!singleStream) assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS); + { U32 const lhc = hType + ((U32)(!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14); + MEM_writeLE24(ostart, lhc); + break; + } + case 4: /* 2 - 2 - 14 - 14 */ + assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS); + { U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18); + MEM_writeLE32(ostart, lhc); + break; + } + case 5: /* 2 - 2 - 18 - 18 */ + assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS); + { U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22); + MEM_writeLE32(ostart, lhc); + ostart[4] = (BYTE)(cLitSize >> 10); + break; + } + default: /* not possible : lhSize is {3,4,5} */ + assert(0); + } + DEBUGLOG(5, "Compressed literals: %u -> %u", (U32)srcSize, (U32)(lhSize+cLitSize)); + return lhSize+cLitSize; +} diff --git a/c-blosc/internal-complibs/zstd-1.5.6/compress/zstd_compress_literals.h b/c-blosc/internal-complibs/zstd-1.5.6/compress/zstd_compress_literals.h new file mode 100644 index 0000000..b060c8a --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.5.6/compress/zstd_compress_literals.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_COMPRESS_LITERALS_H +#define ZSTD_COMPRESS_LITERALS_H + +#include "zstd_compress_internal.h" /* ZSTD_hufCTables_t, ZSTD_minGain() */ + + +size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize); + +/* ZSTD_compressRleLiteralsBlock() : + * Conditions : + * - All bytes in @src are identical + * - dstCapacity >= 4 */ +size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize); + +/* ZSTD_compressLiterals(): + * @entropyWorkspace: must be aligned on 4-bytes boundaries + * @entropyWorkspaceSize : must be >= HUF_WORKSPACE_SIZE + * @suspectUncompressible: sampling checks, to potentially skip huffman coding + */ +size_t ZSTD_compressLiterals (void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + void* entropyWorkspace, size_t entropyWorkspaceSize, + const ZSTD_hufCTables_t* prevHuf, + ZSTD_hufCTables_t* nextHuf, + ZSTD_strategy strategy, int disableLiteralCompression, + int suspectUncompressible, + int bmi2); + +#endif /* ZSTD_COMPRESS_LITERALS_H */ diff --git a/c-blosc/internal-complibs/zstd-1.5.6/compress/zstd_compress_sequences.c b/c-blosc/internal-complibs/zstd-1.5.6/compress/zstd_compress_sequences.c new file mode 100644 index 0000000..8872d4d --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.5.6/compress/zstd_compress_sequences.c @@ -0,0 +1,442 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + /*-************************************* + * Dependencies + ***************************************/ +#include "zstd_compress_sequences.h" + +/** + * -log2(x / 256) lookup table for x in [0, 256). + * If x == 0: Return 0 + * Else: Return floor(-log2(x / 256) * 256) + */ +static unsigned const kInverseProbabilityLog256[256] = { + 0, 2048, 1792, 1642, 1536, 1453, 1386, 1329, 1280, 1236, 1197, 1162, + 1130, 1100, 1073, 1047, 1024, 1001, 980, 960, 941, 923, 906, 889, + 874, 859, 844, 830, 817, 804, 791, 779, 768, 756, 745, 734, + 724, 714, 704, 694, 685, 676, 667, 658, 650, 642, 633, 626, + 618, 610, 603, 595, 588, 581, 574, 567, 561, 554, 548, 542, + 535, 529, 523, 517, 512, 506, 500, 495, 489, 484, 478, 473, + 468, 463, 458, 453, 448, 443, 438, 434, 429, 424, 420, 415, + 411, 407, 402, 398, 394, 390, 386, 382, 377, 373, 370, 366, + 362, 358, 354, 350, 347, 343, 339, 336, 332, 329, 325, 322, + 318, 315, 311, 308, 305, 302, 298, 295, 292, 289, 286, 282, + 279, 276, 273, 270, 267, 264, 261, 258, 256, 253, 250, 247, + 244, 241, 239, 236, 233, 230, 228, 225, 222, 220, 217, 215, + 212, 209, 207, 204, 202, 199, 197, 194, 192, 190, 187, 185, + 182, 180, 178, 175, 173, 171, 168, 166, 164, 162, 159, 157, + 155, 153, 151, 149, 146, 144, 142, 140, 138, 136, 134, 132, + 130, 128, 126, 123, 121, 119, 117, 115, 114, 112, 110, 108, + 106, 104, 102, 100, 98, 96, 94, 93, 91, 89, 87, 85, + 83, 82, 80, 78, 76, 74, 73, 71, 69, 67, 66, 64, + 62, 61, 59, 57, 55, 54, 52, 50, 49, 47, 46, 44, + 42, 41, 39, 37, 36, 34, 33, 31, 30, 28, 26, 25, + 23, 22, 20, 19, 17, 16, 14, 13, 11, 10, 8, 7, + 5, 4, 2, 1, +}; + +static unsigned ZSTD_getFSEMaxSymbolValue(FSE_CTable const* ctable) { + void const* ptr = ctable; + U16 const* u16ptr = (U16 const*)ptr; + U32 const maxSymbolValue = MEM_read16(u16ptr + 1); + return maxSymbolValue; +} + +/** + * Returns true if we should use ncount=-1 else we should + * use ncount=1 for low probability symbols instead. + */ +static unsigned ZSTD_useLowProbCount(size_t const nbSeq) +{ + /* Heuristic: This should cover most blocks <= 16K and + * start to fade out after 16K to about 32K depending on + * compressibility. + */ + return nbSeq >= 2048; +} + +/** + * Returns the cost in bytes of encoding the normalized count header. + * Returns an error if any of the helper functions return an error. + */ +static size_t ZSTD_NCountCost(unsigned const* count, unsigned const max, + size_t const nbSeq, unsigned const FSELog) +{ + BYTE wksp[FSE_NCOUNTBOUND]; + S16 norm[MaxSeq + 1]; + const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max); + FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq, max, ZSTD_useLowProbCount(nbSeq)), ""); + return FSE_writeNCount(wksp, sizeof(wksp), norm, max, tableLog); +} + +/** + * Returns the cost in bits of encoding the distribution described by count + * using the entropy bound. + */ +static size_t ZSTD_entropyCost(unsigned const* count, unsigned const max, size_t const total) +{ + unsigned cost = 0; + unsigned s; + + assert(total > 0); + for (s = 0; s <= max; ++s) { + unsigned norm = (unsigned)((256 * count[s]) / total); + if (count[s] != 0 && norm == 0) + norm = 1; + assert(count[s] < total); + cost += count[s] * kInverseProbabilityLog256[norm]; + } + return cost >> 8; +} + +/** + * Returns the cost in bits of encoding the distribution in count using ctable. + * Returns an error if ctable cannot represent all the symbols in count. + */ +size_t ZSTD_fseBitCost( + FSE_CTable const* ctable, + unsigned const* count, + unsigned const max) +{ + unsigned const kAccuracyLog = 8; + size_t cost = 0; + unsigned s; + FSE_CState_t cstate; + FSE_initCState(&cstate, ctable); + if (ZSTD_getFSEMaxSymbolValue(ctable) < max) { + DEBUGLOG(5, "Repeat FSE_CTable has maxSymbolValue %u < %u", + ZSTD_getFSEMaxSymbolValue(ctable), max); + return ERROR(GENERIC); + } + for (s = 0; s <= max; ++s) { + unsigned const tableLog = cstate.stateLog; + unsigned const badCost = (tableLog + 1) << kAccuracyLog; + unsigned const bitCost = FSE_bitCost(cstate.symbolTT, tableLog, s, kAccuracyLog); + if (count[s] == 0) + continue; + if (bitCost >= badCost) { + DEBUGLOG(5, "Repeat FSE_CTable has Prob[%u] == 0", s); + return ERROR(GENERIC); + } + cost += (size_t)count[s] * bitCost; + } + return cost >> kAccuracyLog; +} + +/** + * Returns the cost in bits of encoding the distribution in count using the + * table described by norm. The max symbol support by norm is assumed >= max. + * norm must be valid for every symbol with non-zero probability in count. + */ +size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog, + unsigned const* count, unsigned const max) +{ + unsigned const shift = 8 - accuracyLog; + size_t cost = 0; + unsigned s; + assert(accuracyLog <= 8); + for (s = 0; s <= max; ++s) { + unsigned const normAcc = (norm[s] != -1) ? (unsigned)norm[s] : 1; + unsigned const norm256 = normAcc << shift; + assert(norm256 > 0); + assert(norm256 < 256); + cost += count[s] * kInverseProbabilityLog256[norm256]; + } + return cost >> 8; +} + +symbolEncodingType_e +ZSTD_selectEncodingType( + FSE_repeat* repeatMode, unsigned const* count, unsigned const max, + size_t const mostFrequent, size_t nbSeq, unsigned const FSELog, + FSE_CTable const* prevCTable, + short const* defaultNorm, U32 defaultNormLog, + ZSTD_defaultPolicy_e const isDefaultAllowed, + ZSTD_strategy const strategy) +{ + ZSTD_STATIC_ASSERT(ZSTD_defaultDisallowed == 0 && ZSTD_defaultAllowed != 0); + if (mostFrequent == nbSeq) { + *repeatMode = FSE_repeat_none; + if (isDefaultAllowed && nbSeq <= 2) { + /* Prefer set_basic over set_rle when there are 2 or fewer symbols, + * since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol. + * If basic encoding isn't possible, always choose RLE. + */ + DEBUGLOG(5, "Selected set_basic"); + return set_basic; + } + DEBUGLOG(5, "Selected set_rle"); + return set_rle; + } + if (strategy < ZSTD_lazy) { + if (isDefaultAllowed) { + size_t const staticFse_nbSeq_max = 1000; + size_t const mult = 10 - strategy; + size_t const baseLog = 3; + size_t const dynamicFse_nbSeq_min = (((size_t)1 << defaultNormLog) * mult) >> baseLog; /* 28-36 for offset, 56-72 for lengths */ + assert(defaultNormLog >= 5 && defaultNormLog <= 6); /* xx_DEFAULTNORMLOG */ + assert(mult <= 9 && mult >= 7); + if ( (*repeatMode == FSE_repeat_valid) + && (nbSeq < staticFse_nbSeq_max) ) { + DEBUGLOG(5, "Selected set_repeat"); + return set_repeat; + } + if ( (nbSeq < dynamicFse_nbSeq_min) + || (mostFrequent < (nbSeq >> (defaultNormLog-1))) ) { + DEBUGLOG(5, "Selected set_basic"); + /* The format allows default tables to be repeated, but it isn't useful. + * When using simple heuristics to select encoding type, we don't want + * to confuse these tables with dictionaries. When running more careful + * analysis, we don't need to waste time checking both repeating tables + * and default tables. + */ + *repeatMode = FSE_repeat_none; + return set_basic; + } + } + } else { + size_t const basicCost = isDefaultAllowed ? ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, count, max) : ERROR(GENERIC); + size_t const repeatCost = *repeatMode != FSE_repeat_none ? ZSTD_fseBitCost(prevCTable, count, max) : ERROR(GENERIC); + size_t const NCountCost = ZSTD_NCountCost(count, max, nbSeq, FSELog); + size_t const compressedCost = (NCountCost << 3) + ZSTD_entropyCost(count, max, nbSeq); + + if (isDefaultAllowed) { + assert(!ZSTD_isError(basicCost)); + assert(!(*repeatMode == FSE_repeat_valid && ZSTD_isError(repeatCost))); + } + assert(!ZSTD_isError(NCountCost)); + assert(compressedCost < ERROR(maxCode)); + DEBUGLOG(5, "Estimated bit costs: basic=%u\trepeat=%u\tcompressed=%u", + (unsigned)basicCost, (unsigned)repeatCost, (unsigned)compressedCost); + if (basicCost <= repeatCost && basicCost <= compressedCost) { + DEBUGLOG(5, "Selected set_basic"); + assert(isDefaultAllowed); + *repeatMode = FSE_repeat_none; + return set_basic; + } + if (repeatCost <= compressedCost) { + DEBUGLOG(5, "Selected set_repeat"); + assert(!ZSTD_isError(repeatCost)); + return set_repeat; + } + assert(compressedCost < basicCost && compressedCost < repeatCost); + } + DEBUGLOG(5, "Selected set_compressed"); + *repeatMode = FSE_repeat_check; + return set_compressed; +} + +typedef struct { + S16 norm[MaxSeq + 1]; + U32 wksp[FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(MaxSeq, MaxFSELog)]; +} ZSTD_BuildCTableWksp; + +size_t +ZSTD_buildCTable(void* dst, size_t dstCapacity, + FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type, + unsigned* count, U32 max, + const BYTE* codeTable, size_t nbSeq, + const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax, + const FSE_CTable* prevCTable, size_t prevCTableSize, + void* entropyWorkspace, size_t entropyWorkspaceSize) +{ + BYTE* op = (BYTE*)dst; + const BYTE* const oend = op + dstCapacity; + DEBUGLOG(6, "ZSTD_buildCTable (dstCapacity=%u)", (unsigned)dstCapacity); + + switch (type) { + case set_rle: + FORWARD_IF_ERROR(FSE_buildCTable_rle(nextCTable, (BYTE)max), ""); + RETURN_ERROR_IF(dstCapacity==0, dstSize_tooSmall, "not enough space"); + *op = codeTable[0]; + return 1; + case set_repeat: + ZSTD_memcpy(nextCTable, prevCTable, prevCTableSize); + return 0; + case set_basic: + FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, entropyWorkspace, entropyWorkspaceSize), ""); /* note : could be pre-calculated */ + return 0; + case set_compressed: { + ZSTD_BuildCTableWksp* wksp = (ZSTD_BuildCTableWksp*)entropyWorkspace; + size_t nbSeq_1 = nbSeq; + const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max); + if (count[codeTable[nbSeq-1]] > 1) { + count[codeTable[nbSeq-1]]--; + nbSeq_1--; + } + assert(nbSeq_1 > 1); + assert(entropyWorkspaceSize >= sizeof(ZSTD_BuildCTableWksp)); + (void)entropyWorkspaceSize; + FORWARD_IF_ERROR(FSE_normalizeCount(wksp->norm, tableLog, count, nbSeq_1, max, ZSTD_useLowProbCount(nbSeq_1)), "FSE_normalizeCount failed"); + assert(oend >= op); + { size_t const NCountSize = FSE_writeNCount(op, (size_t)(oend - op), wksp->norm, max, tableLog); /* overflow protected */ + FORWARD_IF_ERROR(NCountSize, "FSE_writeNCount failed"); + FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, wksp->norm, max, tableLog, wksp->wksp, sizeof(wksp->wksp)), "FSE_buildCTable_wksp failed"); + return NCountSize; + } + } + default: assert(0); RETURN_ERROR(GENERIC, "impossible to reach"); + } +} + +FORCE_INLINE_TEMPLATE size_t +ZSTD_encodeSequences_body( + void* dst, size_t dstCapacity, + FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, + FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, + FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, + seqDef const* sequences, size_t nbSeq, int longOffsets) +{ + BIT_CStream_t blockStream; + FSE_CState_t stateMatchLength; + FSE_CState_t stateOffsetBits; + FSE_CState_t stateLitLength; + + RETURN_ERROR_IF( + ERR_isError(BIT_initCStream(&blockStream, dst, dstCapacity)), + dstSize_tooSmall, "not enough space remaining"); + DEBUGLOG(6, "available space for bitstream : %i (dstCapacity=%u)", + (int)(blockStream.endPtr - blockStream.startPtr), + (unsigned)dstCapacity); + + /* first symbols */ + FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]); + FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, ofCodeTable[nbSeq-1]); + FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]); + BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]); + if (MEM_32bits()) BIT_flushBits(&blockStream); + BIT_addBits(&blockStream, sequences[nbSeq-1].mlBase, ML_bits[mlCodeTable[nbSeq-1]]); + if (MEM_32bits()) BIT_flushBits(&blockStream); + if (longOffsets) { + U32 const ofBits = ofCodeTable[nbSeq-1]; + unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1); + if (extraBits) { + BIT_addBits(&blockStream, sequences[nbSeq-1].offBase, extraBits); + BIT_flushBits(&blockStream); + } + BIT_addBits(&blockStream, sequences[nbSeq-1].offBase >> extraBits, + ofBits - extraBits); + } else { + BIT_addBits(&blockStream, sequences[nbSeq-1].offBase, ofCodeTable[nbSeq-1]); + } + BIT_flushBits(&blockStream); + + { size_t n; + for (n=nbSeq-2 ; n= 64-7-(LLFSELog+MLFSELog+OffFSELog))) + BIT_flushBits(&blockStream); /* (7)*/ + BIT_addBits(&blockStream, sequences[n].litLength, llBits); + if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream); + BIT_addBits(&blockStream, sequences[n].mlBase, mlBits); + if (MEM_32bits() || (ofBits+mlBits+llBits > 56)) BIT_flushBits(&blockStream); + if (longOffsets) { + unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1); + if (extraBits) { + BIT_addBits(&blockStream, sequences[n].offBase, extraBits); + BIT_flushBits(&blockStream); /* (7)*/ + } + BIT_addBits(&blockStream, sequences[n].offBase >> extraBits, + ofBits - extraBits); /* 31 */ + } else { + BIT_addBits(&blockStream, sequences[n].offBase, ofBits); /* 31 */ + } + BIT_flushBits(&blockStream); /* (7)*/ + DEBUGLOG(7, "remaining space : %i", (int)(blockStream.endPtr - blockStream.ptr)); + } } + + DEBUGLOG(6, "ZSTD_encodeSequences: flushing ML state with %u bits", stateMatchLength.stateLog); + FSE_flushCState(&blockStream, &stateMatchLength); + DEBUGLOG(6, "ZSTD_encodeSequences: flushing Off state with %u bits", stateOffsetBits.stateLog); + FSE_flushCState(&blockStream, &stateOffsetBits); + DEBUGLOG(6, "ZSTD_encodeSequences: flushing LL state with %u bits", stateLitLength.stateLog); + FSE_flushCState(&blockStream, &stateLitLength); + + { size_t const streamSize = BIT_closeCStream(&blockStream); + RETURN_ERROR_IF(streamSize==0, dstSize_tooSmall, "not enough space"); + return streamSize; + } +} + +static size_t +ZSTD_encodeSequences_default( + void* dst, size_t dstCapacity, + FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, + FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, + FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, + seqDef const* sequences, size_t nbSeq, int longOffsets) +{ + return ZSTD_encodeSequences_body(dst, dstCapacity, + CTable_MatchLength, mlCodeTable, + CTable_OffsetBits, ofCodeTable, + CTable_LitLength, llCodeTable, + sequences, nbSeq, longOffsets); +} + + +#if DYNAMIC_BMI2 + +static BMI2_TARGET_ATTRIBUTE size_t +ZSTD_encodeSequences_bmi2( + void* dst, size_t dstCapacity, + FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, + FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, + FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, + seqDef const* sequences, size_t nbSeq, int longOffsets) +{ + return ZSTD_encodeSequences_body(dst, dstCapacity, + CTable_MatchLength, mlCodeTable, + CTable_OffsetBits, ofCodeTable, + CTable_LitLength, llCodeTable, + sequences, nbSeq, longOffsets); +} + +#endif + +size_t ZSTD_encodeSequences( + void* dst, size_t dstCapacity, + FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, + FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, + FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, + seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2) +{ + DEBUGLOG(5, "ZSTD_encodeSequences: dstCapacity = %u", (unsigned)dstCapacity); +#if DYNAMIC_BMI2 + if (bmi2) { + return ZSTD_encodeSequences_bmi2(dst, dstCapacity, + CTable_MatchLength, mlCodeTable, + CTable_OffsetBits, ofCodeTable, + CTable_LitLength, llCodeTable, + sequences, nbSeq, longOffsets); + } +#endif + (void)bmi2; + return ZSTD_encodeSequences_default(dst, dstCapacity, + CTable_MatchLength, mlCodeTable, + CTable_OffsetBits, ofCodeTable, + CTable_LitLength, llCodeTable, + sequences, nbSeq, longOffsets); +} diff --git a/c-blosc/internal-complibs/zstd-1.5.6/compress/zstd_compress_sequences.h b/c-blosc/internal-complibs/zstd-1.5.6/compress/zstd_compress_sequences.h new file mode 100644 index 0000000..4a3a05d --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.5.6/compress/zstd_compress_sequences.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_COMPRESS_SEQUENCES_H +#define ZSTD_COMPRESS_SEQUENCES_H + +#include "../common/fse.h" /* FSE_repeat, FSE_CTable */ +#include "../common/zstd_internal.h" /* symbolEncodingType_e, ZSTD_strategy */ + +typedef enum { + ZSTD_defaultDisallowed = 0, + ZSTD_defaultAllowed = 1 +} ZSTD_defaultPolicy_e; + +symbolEncodingType_e +ZSTD_selectEncodingType( + FSE_repeat* repeatMode, unsigned const* count, unsigned const max, + size_t const mostFrequent, size_t nbSeq, unsigned const FSELog, + FSE_CTable const* prevCTable, + short const* defaultNorm, U32 defaultNormLog, + ZSTD_defaultPolicy_e const isDefaultAllowed, + ZSTD_strategy const strategy); + +size_t +ZSTD_buildCTable(void* dst, size_t dstCapacity, + FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type, + unsigned* count, U32 max, + const BYTE* codeTable, size_t nbSeq, + const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax, + const FSE_CTable* prevCTable, size_t prevCTableSize, + void* entropyWorkspace, size_t entropyWorkspaceSize); + +size_t ZSTD_encodeSequences( + void* dst, size_t dstCapacity, + FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, + FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, + FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, + seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2); + +size_t ZSTD_fseBitCost( + FSE_CTable const* ctable, + unsigned const* count, + unsigned const max); + +size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog, + unsigned const* count, unsigned const max); +#endif /* ZSTD_COMPRESS_SEQUENCES_H */ diff --git a/c-blosc/internal-complibs/zstd-1.5.6/compress/zstd_compress_superblock.c b/c-blosc/internal-complibs/zstd-1.5.6/compress/zstd_compress_superblock.c new file mode 100644 index 0000000..628a2dc --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.5.6/compress/zstd_compress_superblock.c @@ -0,0 +1,688 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + /*-************************************* + * Dependencies + ***************************************/ +#include "zstd_compress_superblock.h" + +#include "../common/zstd_internal.h" /* ZSTD_getSequenceLength */ +#include "hist.h" /* HIST_countFast_wksp */ +#include "zstd_compress_internal.h" /* ZSTD_[huf|fse|entropy]CTablesMetadata_t */ +#include "zstd_compress_sequences.h" +#include "zstd_compress_literals.h" + +/** ZSTD_compressSubBlock_literal() : + * Compresses literals section for a sub-block. + * When we have to write the Huffman table we will sometimes choose a header + * size larger than necessary. This is because we have to pick the header size + * before we know the table size + compressed size, so we have a bound on the + * table size. If we guessed incorrectly, we fall back to uncompressed literals. + * + * We write the header when writeEntropy=1 and set entropyWritten=1 when we succeeded + * in writing the header, otherwise it is set to 0. + * + * hufMetadata->hType has literals block type info. + * If it is set_basic, all sub-blocks literals section will be Raw_Literals_Block. + * If it is set_rle, all sub-blocks literals section will be RLE_Literals_Block. + * If it is set_compressed, first sub-block's literals section will be Compressed_Literals_Block + * If it is set_compressed, first sub-block's literals section will be Treeless_Literals_Block + * and the following sub-blocks' literals sections will be Treeless_Literals_Block. + * @return : compressed size of literals section of a sub-block + * Or 0 if unable to compress. + * Or error code */ +static size_t +ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable, + const ZSTD_hufCTablesMetadata_t* hufMetadata, + const BYTE* literals, size_t litSize, + void* dst, size_t dstSize, + const int bmi2, int writeEntropy, int* entropyWritten) +{ + size_t const header = writeEntropy ? 200 : 0; + size_t const lhSize = 3 + (litSize >= (1 KB - header)) + (litSize >= (16 KB - header)); + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + dstSize; + BYTE* op = ostart + lhSize; + U32 const singleStream = lhSize == 3; + symbolEncodingType_e hType = writeEntropy ? hufMetadata->hType : set_repeat; + size_t cLitSize = 0; + + DEBUGLOG(5, "ZSTD_compressSubBlock_literal (litSize=%zu, lhSize=%zu, writeEntropy=%d)", litSize, lhSize, writeEntropy); + + *entropyWritten = 0; + if (litSize == 0 || hufMetadata->hType == set_basic) { + DEBUGLOG(5, "ZSTD_compressSubBlock_literal using raw literal"); + return ZSTD_noCompressLiterals(dst, dstSize, literals, litSize); + } else if (hufMetadata->hType == set_rle) { + DEBUGLOG(5, "ZSTD_compressSubBlock_literal using rle literal"); + return ZSTD_compressRleLiteralsBlock(dst, dstSize, literals, litSize); + } + + assert(litSize > 0); + assert(hufMetadata->hType == set_compressed || hufMetadata->hType == set_repeat); + + if (writeEntropy && hufMetadata->hType == set_compressed) { + ZSTD_memcpy(op, hufMetadata->hufDesBuffer, hufMetadata->hufDesSize); + op += hufMetadata->hufDesSize; + cLitSize += hufMetadata->hufDesSize; + DEBUGLOG(5, "ZSTD_compressSubBlock_literal (hSize=%zu)", hufMetadata->hufDesSize); + } + + { int const flags = bmi2 ? HUF_flags_bmi2 : 0; + const size_t cSize = singleStream ? HUF_compress1X_usingCTable(op, (size_t)(oend-op), literals, litSize, hufTable, flags) + : HUF_compress4X_usingCTable(op, (size_t)(oend-op), literals, litSize, hufTable, flags); + op += cSize; + cLitSize += cSize; + if (cSize == 0 || ERR_isError(cSize)) { + DEBUGLOG(5, "Failed to write entropy tables %s", ZSTD_getErrorName(cSize)); + return 0; + } + /* If we expand and we aren't writing a header then emit uncompressed */ + if (!writeEntropy && cLitSize >= litSize) { + DEBUGLOG(5, "ZSTD_compressSubBlock_literal using raw literal because uncompressible"); + return ZSTD_noCompressLiterals(dst, dstSize, literals, litSize); + } + /* If we are writing headers then allow expansion that doesn't change our header size. */ + if (lhSize < (size_t)(3 + (cLitSize >= 1 KB) + (cLitSize >= 16 KB))) { + assert(cLitSize > litSize); + DEBUGLOG(5, "Literals expanded beyond allowed header size"); + return ZSTD_noCompressLiterals(dst, dstSize, literals, litSize); + } + DEBUGLOG(5, "ZSTD_compressSubBlock_literal (cSize=%zu)", cSize); + } + + /* Build header */ + switch(lhSize) + { + case 3: /* 2 - 2 - 10 - 10 */ + { U32 const lhc = hType + ((U32)(!singleStream) << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<14); + MEM_writeLE24(ostart, lhc); + break; + } + case 4: /* 2 - 2 - 14 - 14 */ + { U32 const lhc = hType + (2 << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<18); + MEM_writeLE32(ostart, lhc); + break; + } + case 5: /* 2 - 2 - 18 - 18 */ + { U32 const lhc = hType + (3 << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<22); + MEM_writeLE32(ostart, lhc); + ostart[4] = (BYTE)(cLitSize >> 10); + break; + } + default: /* not possible : lhSize is {3,4,5} */ + assert(0); + } + *entropyWritten = 1; + DEBUGLOG(5, "Compressed literals: %u -> %u", (U32)litSize, (U32)(op-ostart)); + return (size_t)(op-ostart); +} + +static size_t +ZSTD_seqDecompressedSize(seqStore_t const* seqStore, + const seqDef* sequences, size_t nbSeqs, + size_t litSize, int lastSubBlock) +{ + size_t matchLengthSum = 0; + size_t litLengthSum = 0; + size_t n; + for (n=0; nllType, fseMetadata->ofType, and fseMetadata->mlType have + * symbol compression modes for the super-block. + * The first successfully compressed block will have these in its header. + * We set entropyWritten=1 when we succeed in compressing the sequences. + * The following sub-blocks will always have repeat mode. + * @return : compressed size of sequences section of a sub-block + * Or 0 if it is unable to compress + * Or error code. */ +static size_t +ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables, + const ZSTD_fseCTablesMetadata_t* fseMetadata, + const seqDef* sequences, size_t nbSeq, + const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode, + const ZSTD_CCtx_params* cctxParams, + void* dst, size_t dstCapacity, + const int bmi2, int writeEntropy, int* entropyWritten) +{ + const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN; + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + dstCapacity; + BYTE* op = ostart; + BYTE* seqHead; + + DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (nbSeq=%zu, writeEntropy=%d, longOffsets=%d)", nbSeq, writeEntropy, longOffsets); + + *entropyWritten = 0; + /* Sequences Header */ + RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/, + dstSize_tooSmall, ""); + if (nbSeq < 128) + *op++ = (BYTE)nbSeq; + else if (nbSeq < LONGNBSEQ) + op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2; + else + op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3; + if (nbSeq==0) { + return (size_t)(op - ostart); + } + + /* seqHead : flags for FSE encoding type */ + seqHead = op++; + + DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (seqHeadSize=%u)", (unsigned)(op-ostart)); + + if (writeEntropy) { + const U32 LLtype = fseMetadata->llType; + const U32 Offtype = fseMetadata->ofType; + const U32 MLtype = fseMetadata->mlType; + DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (fseTablesSize=%zu)", fseMetadata->fseTablesSize); + *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2)); + ZSTD_memcpy(op, fseMetadata->fseTablesBuffer, fseMetadata->fseTablesSize); + op += fseMetadata->fseTablesSize; + } else { + const U32 repeat = set_repeat; + *seqHead = (BYTE)((repeat<<6) + (repeat<<4) + (repeat<<2)); + } + + { size_t const bitstreamSize = ZSTD_encodeSequences( + op, (size_t)(oend - op), + fseTables->matchlengthCTable, mlCode, + fseTables->offcodeCTable, ofCode, + fseTables->litlengthCTable, llCode, + sequences, nbSeq, + longOffsets, bmi2); + FORWARD_IF_ERROR(bitstreamSize, "ZSTD_encodeSequences failed"); + op += bitstreamSize; + /* zstd versions <= 1.3.4 mistakenly report corruption when + * FSE_readNCount() receives a buffer < 4 bytes. + * Fixed by https://github.com/facebook/zstd/pull/1146. + * This can happen when the last set_compressed table present is 2 + * bytes and the bitstream is only one byte. + * In this exceedingly rare case, we will simply emit an uncompressed + * block, since it isn't worth optimizing. + */ +#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + if (writeEntropy && fseMetadata->lastCountSize && fseMetadata->lastCountSize + bitstreamSize < 4) { + /* NCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */ + assert(fseMetadata->lastCountSize + bitstreamSize == 3); + DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.3.4 by " + "emitting an uncompressed block."); + return 0; + } +#endif + DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (bitstreamSize=%zu)", bitstreamSize); + } + + /* zstd versions <= 1.4.0 mistakenly report error when + * sequences section body size is less than 3 bytes. + * Fixed by https://github.com/facebook/zstd/pull/1664. + * This can happen when the previous sequences section block is compressed + * with rle mode and the current block's sequences section is compressed + * with repeat mode where sequences section body size can be 1 byte. + */ +#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + if (op-seqHead < 4) { + DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.4.0 by emitting " + "an uncompressed block when sequences are < 4 bytes"); + return 0; + } +#endif + + *entropyWritten = 1; + return (size_t)(op - ostart); +} + +/** ZSTD_compressSubBlock() : + * Compresses a single sub-block. + * @return : compressed size of the sub-block + * Or 0 if it failed to compress. */ +static size_t ZSTD_compressSubBlock(const ZSTD_entropyCTables_t* entropy, + const ZSTD_entropyCTablesMetadata_t* entropyMetadata, + const seqDef* sequences, size_t nbSeq, + const BYTE* literals, size_t litSize, + const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode, + const ZSTD_CCtx_params* cctxParams, + void* dst, size_t dstCapacity, + const int bmi2, + int writeLitEntropy, int writeSeqEntropy, + int* litEntropyWritten, int* seqEntropyWritten, + U32 lastBlock) +{ + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + dstCapacity; + BYTE* op = ostart + ZSTD_blockHeaderSize; + DEBUGLOG(5, "ZSTD_compressSubBlock (litSize=%zu, nbSeq=%zu, writeLitEntropy=%d, writeSeqEntropy=%d, lastBlock=%d)", + litSize, nbSeq, writeLitEntropy, writeSeqEntropy, lastBlock); + { size_t cLitSize = ZSTD_compressSubBlock_literal((const HUF_CElt*)entropy->huf.CTable, + &entropyMetadata->hufMetadata, literals, litSize, + op, (size_t)(oend-op), + bmi2, writeLitEntropy, litEntropyWritten); + FORWARD_IF_ERROR(cLitSize, "ZSTD_compressSubBlock_literal failed"); + if (cLitSize == 0) return 0; + op += cLitSize; + } + { size_t cSeqSize = ZSTD_compressSubBlock_sequences(&entropy->fse, + &entropyMetadata->fseMetadata, + sequences, nbSeq, + llCode, mlCode, ofCode, + cctxParams, + op, (size_t)(oend-op), + bmi2, writeSeqEntropy, seqEntropyWritten); + FORWARD_IF_ERROR(cSeqSize, "ZSTD_compressSubBlock_sequences failed"); + if (cSeqSize == 0) return 0; + op += cSeqSize; + } + /* Write block header */ + { size_t cSize = (size_t)(op-ostart) - ZSTD_blockHeaderSize; + U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); + MEM_writeLE24(ostart, cBlockHeader24); + } + return (size_t)(op-ostart); +} + +static size_t ZSTD_estimateSubBlockSize_literal(const BYTE* literals, size_t litSize, + const ZSTD_hufCTables_t* huf, + const ZSTD_hufCTablesMetadata_t* hufMetadata, + void* workspace, size_t wkspSize, + int writeEntropy) +{ + unsigned* const countWksp = (unsigned*)workspace; + unsigned maxSymbolValue = 255; + size_t literalSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */ + + if (hufMetadata->hType == set_basic) return litSize; + else if (hufMetadata->hType == set_rle) return 1; + else if (hufMetadata->hType == set_compressed || hufMetadata->hType == set_repeat) { + size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)literals, litSize, workspace, wkspSize); + if (ZSTD_isError(largest)) return litSize; + { size_t cLitSizeEstimate = HUF_estimateCompressedSize((const HUF_CElt*)huf->CTable, countWksp, maxSymbolValue); + if (writeEntropy) cLitSizeEstimate += hufMetadata->hufDesSize; + return cLitSizeEstimate + literalSectionHeaderSize; + } } + assert(0); /* impossible */ + return 0; +} + +static size_t ZSTD_estimateSubBlockSize_symbolType(symbolEncodingType_e type, + const BYTE* codeTable, unsigned maxCode, + size_t nbSeq, const FSE_CTable* fseCTable, + const U8* additionalBits, + short const* defaultNorm, U32 defaultNormLog, U32 defaultMax, + void* workspace, size_t wkspSize) +{ + unsigned* const countWksp = (unsigned*)workspace; + const BYTE* ctp = codeTable; + const BYTE* const ctStart = ctp; + const BYTE* const ctEnd = ctStart + nbSeq; + size_t cSymbolTypeSizeEstimateInBits = 0; + unsigned max = maxCode; + + HIST_countFast_wksp(countWksp, &max, codeTable, nbSeq, workspace, wkspSize); /* can't fail */ + if (type == set_basic) { + /* We selected this encoding type, so it must be valid. */ + assert(max <= defaultMax); + cSymbolTypeSizeEstimateInBits = max <= defaultMax + ? ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, countWksp, max) + : ERROR(GENERIC); + } else if (type == set_rle) { + cSymbolTypeSizeEstimateInBits = 0; + } else if (type == set_compressed || type == set_repeat) { + cSymbolTypeSizeEstimateInBits = ZSTD_fseBitCost(fseCTable, countWksp, max); + } + if (ZSTD_isError(cSymbolTypeSizeEstimateInBits)) return nbSeq * 10; + while (ctp < ctEnd) { + if (additionalBits) cSymbolTypeSizeEstimateInBits += additionalBits[*ctp]; + else cSymbolTypeSizeEstimateInBits += *ctp; /* for offset, offset code is also the number of additional bits */ + ctp++; + } + return cSymbolTypeSizeEstimateInBits / 8; +} + +static size_t ZSTD_estimateSubBlockSize_sequences(const BYTE* ofCodeTable, + const BYTE* llCodeTable, + const BYTE* mlCodeTable, + size_t nbSeq, + const ZSTD_fseCTables_t* fseTables, + const ZSTD_fseCTablesMetadata_t* fseMetadata, + void* workspace, size_t wkspSize, + int writeEntropy) +{ + size_t const sequencesSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */ + size_t cSeqSizeEstimate = 0; + if (nbSeq == 0) return sequencesSectionHeaderSize; + cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->ofType, ofCodeTable, MaxOff, + nbSeq, fseTables->offcodeCTable, NULL, + OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, + workspace, wkspSize); + cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->llType, llCodeTable, MaxLL, + nbSeq, fseTables->litlengthCTable, LL_bits, + LL_defaultNorm, LL_defaultNormLog, MaxLL, + workspace, wkspSize); + cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->mlType, mlCodeTable, MaxML, + nbSeq, fseTables->matchlengthCTable, ML_bits, + ML_defaultNorm, ML_defaultNormLog, MaxML, + workspace, wkspSize); + if (writeEntropy) cSeqSizeEstimate += fseMetadata->fseTablesSize; + return cSeqSizeEstimate + sequencesSectionHeaderSize; +} + +typedef struct { + size_t estLitSize; + size_t estBlockSize; +} EstimatedBlockSize; +static EstimatedBlockSize ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize, + const BYTE* ofCodeTable, + const BYTE* llCodeTable, + const BYTE* mlCodeTable, + size_t nbSeq, + const ZSTD_entropyCTables_t* entropy, + const ZSTD_entropyCTablesMetadata_t* entropyMetadata, + void* workspace, size_t wkspSize, + int writeLitEntropy, int writeSeqEntropy) +{ + EstimatedBlockSize ebs; + ebs.estLitSize = ZSTD_estimateSubBlockSize_literal(literals, litSize, + &entropy->huf, &entropyMetadata->hufMetadata, + workspace, wkspSize, writeLitEntropy); + ebs.estBlockSize = ZSTD_estimateSubBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable, + nbSeq, &entropy->fse, &entropyMetadata->fseMetadata, + workspace, wkspSize, writeSeqEntropy); + ebs.estBlockSize += ebs.estLitSize + ZSTD_blockHeaderSize; + return ebs; +} + +static int ZSTD_needSequenceEntropyTables(ZSTD_fseCTablesMetadata_t const* fseMetadata) +{ + if (fseMetadata->llType == set_compressed || fseMetadata->llType == set_rle) + return 1; + if (fseMetadata->mlType == set_compressed || fseMetadata->mlType == set_rle) + return 1; + if (fseMetadata->ofType == set_compressed || fseMetadata->ofType == set_rle) + return 1; + return 0; +} + +static size_t countLiterals(seqStore_t const* seqStore, const seqDef* sp, size_t seqCount) +{ + size_t n, total = 0; + assert(sp != NULL); + for (n=0; n %zu bytes", seqCount, (const void*)sp, total); + return total; +} + +#define BYTESCALE 256 + +static size_t sizeBlockSequences(const seqDef* sp, size_t nbSeqs, + size_t targetBudget, size_t avgLitCost, size_t avgSeqCost, + int firstSubBlock) +{ + size_t n, budget = 0, inSize=0; + /* entropy headers */ + size_t const headerSize = (size_t)firstSubBlock * 120 * BYTESCALE; /* generous estimate */ + assert(firstSubBlock==0 || firstSubBlock==1); + budget += headerSize; + + /* first sequence => at least one sequence*/ + budget += sp[0].litLength * avgLitCost + avgSeqCost; + if (budget > targetBudget) return 1; + inSize = sp[0].litLength + (sp[0].mlBase+MINMATCH); + + /* loop over sequences */ + for (n=1; n targetBudget) + /* though continue to expand until the sub-block is deemed compressible */ + && (budget < inSize * BYTESCALE) ) + break; + } + + return n; +} + +/** ZSTD_compressSubBlock_multi() : + * Breaks super-block into multiple sub-blocks and compresses them. + * Entropy will be written into the first block. + * The following blocks use repeat_mode to compress. + * Sub-blocks are all compressed, except the last one when beneficial. + * @return : compressed size of the super block (which features multiple ZSTD blocks) + * or 0 if it failed to compress. */ +static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr, + const ZSTD_compressedBlockState_t* prevCBlock, + ZSTD_compressedBlockState_t* nextCBlock, + const ZSTD_entropyCTablesMetadata_t* entropyMetadata, + const ZSTD_CCtx_params* cctxParams, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const int bmi2, U32 lastBlock, + void* workspace, size_t wkspSize) +{ + const seqDef* const sstart = seqStorePtr->sequencesStart; + const seqDef* const send = seqStorePtr->sequences; + const seqDef* sp = sstart; /* tracks progresses within seqStorePtr->sequences */ + size_t const nbSeqs = (size_t)(send - sstart); + const BYTE* const lstart = seqStorePtr->litStart; + const BYTE* const lend = seqStorePtr->lit; + const BYTE* lp = lstart; + size_t const nbLiterals = (size_t)(lend - lstart); + BYTE const* ip = (BYTE const*)src; + BYTE const* const iend = ip + srcSize; + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + dstCapacity; + BYTE* op = ostart; + const BYTE* llCodePtr = seqStorePtr->llCode; + const BYTE* mlCodePtr = seqStorePtr->mlCode; + const BYTE* ofCodePtr = seqStorePtr->ofCode; + size_t const minTarget = ZSTD_TARGETCBLOCKSIZE_MIN; /* enforce minimum size, to reduce undesirable side effects */ + size_t const targetCBlockSize = MAX(minTarget, cctxParams->targetCBlockSize); + int writeLitEntropy = (entropyMetadata->hufMetadata.hType == set_compressed); + int writeSeqEntropy = 1; + + DEBUGLOG(5, "ZSTD_compressSubBlock_multi (srcSize=%u, litSize=%u, nbSeq=%u)", + (unsigned)srcSize, (unsigned)(lend-lstart), (unsigned)(send-sstart)); + + /* let's start by a general estimation for the full block */ + if (nbSeqs > 0) { + EstimatedBlockSize const ebs = + ZSTD_estimateSubBlockSize(lp, nbLiterals, + ofCodePtr, llCodePtr, mlCodePtr, nbSeqs, + &nextCBlock->entropy, entropyMetadata, + workspace, wkspSize, + writeLitEntropy, writeSeqEntropy); + /* quick estimation */ + size_t const avgLitCost = nbLiterals ? (ebs.estLitSize * BYTESCALE) / nbLiterals : BYTESCALE; + size_t const avgSeqCost = ((ebs.estBlockSize - ebs.estLitSize) * BYTESCALE) / nbSeqs; + const size_t nbSubBlocks = MAX((ebs.estBlockSize + (targetCBlockSize/2)) / targetCBlockSize, 1); + size_t n, avgBlockBudget, blockBudgetSupp=0; + avgBlockBudget = (ebs.estBlockSize * BYTESCALE) / nbSubBlocks; + DEBUGLOG(5, "estimated fullblock size=%u bytes ; avgLitCost=%.2f ; avgSeqCost=%.2f ; targetCBlockSize=%u, nbSubBlocks=%u ; avgBlockBudget=%.0f bytes", + (unsigned)ebs.estBlockSize, (double)avgLitCost/BYTESCALE, (double)avgSeqCost/BYTESCALE, + (unsigned)targetCBlockSize, (unsigned)nbSubBlocks, (double)avgBlockBudget/BYTESCALE); + /* simplification: if estimates states that the full superblock doesn't compress, just bail out immediately + * this will result in the production of a single uncompressed block covering @srcSize.*/ + if (ebs.estBlockSize > srcSize) return 0; + + /* compress and write sub-blocks */ + assert(nbSubBlocks>0); + for (n=0; n < nbSubBlocks-1; n++) { + /* determine nb of sequences for current sub-block + nbLiterals from next sequence */ + size_t const seqCount = sizeBlockSequences(sp, (size_t)(send-sp), + avgBlockBudget + blockBudgetSupp, avgLitCost, avgSeqCost, n==0); + /* if reached last sequence : break to last sub-block (simplification) */ + assert(seqCount <= (size_t)(send-sp)); + if (sp + seqCount == send) break; + assert(seqCount > 0); + /* compress sub-block */ + { int litEntropyWritten = 0; + int seqEntropyWritten = 0; + size_t litSize = countLiterals(seqStorePtr, sp, seqCount); + const size_t decompressedSize = + ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, 0); + size_t const cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata, + sp, seqCount, + lp, litSize, + llCodePtr, mlCodePtr, ofCodePtr, + cctxParams, + op, (size_t)(oend-op), + bmi2, writeLitEntropy, writeSeqEntropy, + &litEntropyWritten, &seqEntropyWritten, + 0); + FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed"); + + /* check compressibility, update state components */ + if (cSize > 0 && cSize < decompressedSize) { + DEBUGLOG(5, "Committed sub-block compressing %u bytes => %u bytes", + (unsigned)decompressedSize, (unsigned)cSize); + assert(ip + decompressedSize <= iend); + ip += decompressedSize; + lp += litSize; + op += cSize; + llCodePtr += seqCount; + mlCodePtr += seqCount; + ofCodePtr += seqCount; + /* Entropy only needs to be written once */ + if (litEntropyWritten) { + writeLitEntropy = 0; + } + if (seqEntropyWritten) { + writeSeqEntropy = 0; + } + sp += seqCount; + blockBudgetSupp = 0; + } } + /* otherwise : do not compress yet, coalesce current sub-block with following one */ + } + } /* if (nbSeqs > 0) */ + + /* write last block */ + DEBUGLOG(5, "Generate last sub-block: %u sequences remaining", (unsigned)(send - sp)); + { int litEntropyWritten = 0; + int seqEntropyWritten = 0; + size_t litSize = (size_t)(lend - lp); + size_t seqCount = (size_t)(send - sp); + const size_t decompressedSize = + ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, 1); + size_t const cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata, + sp, seqCount, + lp, litSize, + llCodePtr, mlCodePtr, ofCodePtr, + cctxParams, + op, (size_t)(oend-op), + bmi2, writeLitEntropy, writeSeqEntropy, + &litEntropyWritten, &seqEntropyWritten, + lastBlock); + FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed"); + + /* update pointers, the nb of literals borrowed from next sequence must be preserved */ + if (cSize > 0 && cSize < decompressedSize) { + DEBUGLOG(5, "Last sub-block compressed %u bytes => %u bytes", + (unsigned)decompressedSize, (unsigned)cSize); + assert(ip + decompressedSize <= iend); + ip += decompressedSize; + lp += litSize; + op += cSize; + llCodePtr += seqCount; + mlCodePtr += seqCount; + ofCodePtr += seqCount; + /* Entropy only needs to be written once */ + if (litEntropyWritten) { + writeLitEntropy = 0; + } + if (seqEntropyWritten) { + writeSeqEntropy = 0; + } + sp += seqCount; + } + } + + + if (writeLitEntropy) { + DEBUGLOG(5, "Literal entropy tables were never written"); + ZSTD_memcpy(&nextCBlock->entropy.huf, &prevCBlock->entropy.huf, sizeof(prevCBlock->entropy.huf)); + } + if (writeSeqEntropy && ZSTD_needSequenceEntropyTables(&entropyMetadata->fseMetadata)) { + /* If we haven't written our entropy tables, then we've violated our contract and + * must emit an uncompressed block. + */ + DEBUGLOG(5, "Sequence entropy tables were never written => cancel, emit an uncompressed block"); + return 0; + } + + if (ip < iend) { + /* some data left : last part of the block sent uncompressed */ + size_t const rSize = (size_t)((iend - ip)); + size_t const cSize = ZSTD_noCompressBlock(op, (size_t)(oend - op), ip, rSize, lastBlock); + DEBUGLOG(5, "Generate last uncompressed sub-block of %u bytes", (unsigned)(rSize)); + FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed"); + assert(cSize != 0); + op += cSize; + /* We have to regenerate the repcodes because we've skipped some sequences */ + if (sp < send) { + const seqDef* seq; + repcodes_t rep; + ZSTD_memcpy(&rep, prevCBlock->rep, sizeof(rep)); + for (seq = sstart; seq < sp; ++seq) { + ZSTD_updateRep(rep.rep, seq->offBase, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0); + } + ZSTD_memcpy(nextCBlock->rep, &rep, sizeof(rep)); + } + } + + DEBUGLOG(5, "ZSTD_compressSubBlock_multi compressed all subBlocks: total compressed size = %u", + (unsigned)(op-ostart)); + return (size_t)(op-ostart); +} + +size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + unsigned lastBlock) +{ + ZSTD_entropyCTablesMetadata_t entropyMetadata; + + FORWARD_IF_ERROR(ZSTD_buildBlockEntropyStats(&zc->seqStore, + &zc->blockState.prevCBlock->entropy, + &zc->blockState.nextCBlock->entropy, + &zc->appliedParams, + &entropyMetadata, + zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */), ""); + + return ZSTD_compressSubBlock_multi(&zc->seqStore, + zc->blockState.prevCBlock, + zc->blockState.nextCBlock, + &entropyMetadata, + &zc->appliedParams, + dst, dstCapacity, + src, srcSize, + zc->bmi2, lastBlock, + zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */); +} diff --git a/c-blosc/internal-complibs/zstd-1.5.6/compress/zstd_compress_superblock.h b/c-blosc/internal-complibs/zstd-1.5.6/compress/zstd_compress_superblock.h new file mode 100644 index 0000000..8e494f0 --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.5.6/compress/zstd_compress_superblock.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_COMPRESS_ADVANCED_H +#define ZSTD_COMPRESS_ADVANCED_H + +/*-************************************* +* Dependencies +***************************************/ + +#include "../zstd.h" /* ZSTD_CCtx */ + +/*-************************************* +* Target Compressed Block Size +***************************************/ + +/* ZSTD_compressSuperBlock() : + * Used to compress a super block when targetCBlockSize is being used. + * The given block will be compressed into multiple sub blocks that are around targetCBlockSize. */ +size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc, + void* dst, size_t dstCapacity, + void const* src, size_t srcSize, + unsigned lastBlock); + +#endif /* ZSTD_COMPRESS_ADVANCED_H */ diff --git a/c-blosc/internal-complibs/zstd-1.5.6/compress/zstd_cwksp.h b/c-blosc/internal-complibs/zstd-1.5.6/compress/zstd_cwksp.h new file mode 100644 index 0000000..3eddbd3 --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.5.6/compress/zstd_cwksp.h @@ -0,0 +1,748 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_CWKSP_H +#define ZSTD_CWKSP_H + +/*-************************************* +* Dependencies +***************************************/ +#include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customFree */ +#include "../common/zstd_internal.h" +#include "../common/portability_macros.h" + +#if defined (__cplusplus) +extern "C" { +#endif + +/*-************************************* +* Constants +***************************************/ + +/* Since the workspace is effectively its own little malloc implementation / + * arena, when we run under ASAN, we should similarly insert redzones between + * each internal element of the workspace, so ASAN will catch overruns that + * reach outside an object but that stay inside the workspace. + * + * This defines the size of that redzone. + */ +#ifndef ZSTD_CWKSP_ASAN_REDZONE_SIZE +#define ZSTD_CWKSP_ASAN_REDZONE_SIZE 128 +#endif + + +/* Set our tables and aligneds to align by 64 bytes */ +#define ZSTD_CWKSP_ALIGNMENT_BYTES 64 + +/*-************************************* +* Structures +***************************************/ +typedef enum { + ZSTD_cwksp_alloc_objects, + ZSTD_cwksp_alloc_aligned_init_once, + ZSTD_cwksp_alloc_aligned, + ZSTD_cwksp_alloc_buffers +} ZSTD_cwksp_alloc_phase_e; + +/** + * Used to describe whether the workspace is statically allocated (and will not + * necessarily ever be freed), or if it's dynamically allocated and we can + * expect a well-formed caller to free this. + */ +typedef enum { + ZSTD_cwksp_dynamic_alloc, + ZSTD_cwksp_static_alloc +} ZSTD_cwksp_static_alloc_e; + +/** + * Zstd fits all its internal datastructures into a single continuous buffer, + * so that it only needs to perform a single OS allocation (or so that a buffer + * can be provided to it and it can perform no allocations at all). This buffer + * is called the workspace. + * + * Several optimizations complicate that process of allocating memory ranges + * from this workspace for each internal datastructure: + * + * - These different internal datastructures have different setup requirements: + * + * - The static objects need to be cleared once and can then be trivially + * reused for each compression. + * + * - Various buffers don't need to be initialized at all--they are always + * written into before they're read. + * + * - The matchstate tables have a unique requirement that they don't need + * their memory to be totally cleared, but they do need the memory to have + * some bound, i.e., a guarantee that all values in the memory they've been + * allocated is less than some maximum value (which is the starting value + * for the indices that they will then use for compression). When this + * guarantee is provided to them, they can use the memory without any setup + * work. When it can't, they have to clear the area. + * + * - These buffers also have different alignment requirements. + * + * - We would like to reuse the objects in the workspace for multiple + * compressions without having to perform any expensive reallocation or + * reinitialization work. + * + * - We would like to be able to efficiently reuse the workspace across + * multiple compressions **even when the compression parameters change** and + * we need to resize some of the objects (where possible). + * + * To attempt to manage this buffer, given these constraints, the ZSTD_cwksp + * abstraction was created. It works as follows: + * + * Workspace Layout: + * + * [ ... workspace ... ] + * [objects][tables ->] free space [<- buffers][<- aligned][<- init once] + * + * The various objects that live in the workspace are divided into the + * following categories, and are allocated separately: + * + * - Static objects: this is optionally the enclosing ZSTD_CCtx or ZSTD_CDict, + * so that literally everything fits in a single buffer. Note: if present, + * this must be the first object in the workspace, since ZSTD_customFree{CCtx, + * CDict}() rely on a pointer comparison to see whether one or two frees are + * required. + * + * - Fixed size objects: these are fixed-size, fixed-count objects that are + * nonetheless "dynamically" allocated in the workspace so that we can + * control how they're initialized separately from the broader ZSTD_CCtx. + * Examples: + * - Entropy Workspace + * - 2 x ZSTD_compressedBlockState_t + * - CDict dictionary contents + * + * - Tables: these are any of several different datastructures (hash tables, + * chain tables, binary trees) that all respect a common format: they are + * uint32_t arrays, all of whose values are between 0 and (nextSrc - base). + * Their sizes depend on the cparams. These tables are 64-byte aligned. + * + * - Init once: these buffers require to be initialized at least once before + * use. They should be used when we want to skip memory initialization + * while not triggering memory checkers (like Valgrind) when reading from + * from this memory without writing to it first. + * These buffers should be used carefully as they might contain data + * from previous compressions. + * Buffers are aligned to 64 bytes. + * + * - Aligned: these buffers don't require any initialization before they're + * used. The user of the buffer should make sure they write into a buffer + * location before reading from it. + * Buffers are aligned to 64 bytes. + * + * - Buffers: these buffers are used for various purposes that don't require + * any alignment or initialization before they're used. This means they can + * be moved around at no cost for a new compression. + * + * Allocating Memory: + * + * The various types of objects must be allocated in order, so they can be + * correctly packed into the workspace buffer. That order is: + * + * 1. Objects + * 2. Init once / Tables + * 3. Aligned / Tables + * 4. Buffers / Tables + * + * Attempts to reserve objects of different types out of order will fail. + */ +typedef struct { + void* workspace; + void* workspaceEnd; + + void* objectEnd; + void* tableEnd; + void* tableValidEnd; + void* allocStart; + void* initOnceStart; + + BYTE allocFailed; + int workspaceOversizedDuration; + ZSTD_cwksp_alloc_phase_e phase; + ZSTD_cwksp_static_alloc_e isStatic; +} ZSTD_cwksp; + +/*-************************************* +* Functions +***************************************/ + +MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws); +MEM_STATIC void* ZSTD_cwksp_initialAllocStart(ZSTD_cwksp* ws); + +MEM_STATIC void ZSTD_cwksp_assert_internal_consistency(ZSTD_cwksp* ws) { + (void)ws; + assert(ws->workspace <= ws->objectEnd); + assert(ws->objectEnd <= ws->tableEnd); + assert(ws->objectEnd <= ws->tableValidEnd); + assert(ws->tableEnd <= ws->allocStart); + assert(ws->tableValidEnd <= ws->allocStart); + assert(ws->allocStart <= ws->workspaceEnd); + assert(ws->initOnceStart <= ZSTD_cwksp_initialAllocStart(ws)); + assert(ws->workspace <= ws->initOnceStart); +#if ZSTD_MEMORY_SANITIZER + { + intptr_t const offset = __msan_test_shadow(ws->initOnceStart, + (U8*)ZSTD_cwksp_initialAllocStart(ws) - (U8*)ws->initOnceStart); + (void)offset; +#if defined(ZSTD_MSAN_PRINT) + if(offset!=-1) { + __msan_print_shadow((U8*)ws->initOnceStart + offset - 8, 32); + } +#endif + assert(offset==-1); + }; +#endif +} + +/** + * Align must be a power of 2. + */ +MEM_STATIC size_t ZSTD_cwksp_align(size_t size, size_t const align) { + size_t const mask = align - 1; + assert((align & mask) == 0); + return (size + mask) & ~mask; +} + +/** + * Use this to determine how much space in the workspace we will consume to + * allocate this object. (Normally it should be exactly the size of the object, + * but under special conditions, like ASAN, where we pad each object, it might + * be larger.) + * + * Since tables aren't currently redzoned, you don't need to call through this + * to figure out how much space you need for the matchState tables. Everything + * else is though. + * + * Do not use for sizing aligned buffers. Instead, use ZSTD_cwksp_aligned_alloc_size(). + */ +MEM_STATIC size_t ZSTD_cwksp_alloc_size(size_t size) { + if (size == 0) + return 0; +#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) + return size + 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE; +#else + return size; +#endif +} + +/** + * Returns an adjusted alloc size that is the nearest larger multiple of 64 bytes. + * Used to determine the number of bytes required for a given "aligned". + */ +MEM_STATIC size_t ZSTD_cwksp_aligned_alloc_size(size_t size) { + return ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(size, ZSTD_CWKSP_ALIGNMENT_BYTES)); +} + +/** + * Returns the amount of additional space the cwksp must allocate + * for internal purposes (currently only alignment). + */ +MEM_STATIC size_t ZSTD_cwksp_slack_space_required(void) { + /* For alignment, the wksp will always allocate an additional 2*ZSTD_CWKSP_ALIGNMENT_BYTES + * bytes to align the beginning of tables section and end of buffers; + */ + size_t const slackSpace = ZSTD_CWKSP_ALIGNMENT_BYTES * 2; + return slackSpace; +} + + +/** + * Return the number of additional bytes required to align a pointer to the given number of bytes. + * alignBytes must be a power of two. + */ +MEM_STATIC size_t ZSTD_cwksp_bytes_to_align_ptr(void* ptr, const size_t alignBytes) { + size_t const alignBytesMask = alignBytes - 1; + size_t const bytes = (alignBytes - ((size_t)ptr & (alignBytesMask))) & alignBytesMask; + assert((alignBytes & alignBytesMask) == 0); + assert(bytes < alignBytes); + return bytes; +} + +/** + * Returns the initial value for allocStart which is used to determine the position from + * which we can allocate from the end of the workspace. + */ +MEM_STATIC void* ZSTD_cwksp_initialAllocStart(ZSTD_cwksp* ws) { + return (void*)((size_t)ws->workspaceEnd & ~(ZSTD_CWKSP_ALIGNMENT_BYTES-1)); +} + +/** + * Internal function. Do not use directly. + * Reserves the given number of bytes within the aligned/buffer segment of the wksp, + * which counts from the end of the wksp (as opposed to the object/table segment). + * + * Returns a pointer to the beginning of that space. + */ +MEM_STATIC void* +ZSTD_cwksp_reserve_internal_buffer_space(ZSTD_cwksp* ws, size_t const bytes) +{ + void* const alloc = (BYTE*)ws->allocStart - bytes; + void* const bottom = ws->tableEnd; + DEBUGLOG(5, "cwksp: reserving %p %zd bytes, %zd bytes remaining", + alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes); + ZSTD_cwksp_assert_internal_consistency(ws); + assert(alloc >= bottom); + if (alloc < bottom) { + DEBUGLOG(4, "cwksp: alloc failed!"); + ws->allocFailed = 1; + return NULL; + } + /* the area is reserved from the end of wksp. + * If it overlaps with tableValidEnd, it voids guarantees on values' range */ + if (alloc < ws->tableValidEnd) { + ws->tableValidEnd = alloc; + } + ws->allocStart = alloc; + return alloc; +} + +/** + * Moves the cwksp to the next phase, and does any necessary allocations. + * cwksp initialization must necessarily go through each phase in order. + * Returns a 0 on success, or zstd error + */ +MEM_STATIC size_t +ZSTD_cwksp_internal_advance_phase(ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase) +{ + assert(phase >= ws->phase); + if (phase > ws->phase) { + /* Going from allocating objects to allocating initOnce / tables */ + if (ws->phase < ZSTD_cwksp_alloc_aligned_init_once && + phase >= ZSTD_cwksp_alloc_aligned_init_once) { + ws->tableValidEnd = ws->objectEnd; + ws->initOnceStart = ZSTD_cwksp_initialAllocStart(ws); + + { /* Align the start of the tables to 64 bytes. Use [0, 63] bytes */ + void *const alloc = ws->objectEnd; + size_t const bytesToAlign = ZSTD_cwksp_bytes_to_align_ptr(alloc, ZSTD_CWKSP_ALIGNMENT_BYTES); + void *const objectEnd = (BYTE *) alloc + bytesToAlign; + DEBUGLOG(5, "reserving table alignment addtl space: %zu", bytesToAlign); + RETURN_ERROR_IF(objectEnd > ws->workspaceEnd, memory_allocation, + "table phase - alignment initial allocation failed!"); + ws->objectEnd = objectEnd; + ws->tableEnd = objectEnd; /* table area starts being empty */ + if (ws->tableValidEnd < ws->tableEnd) { + ws->tableValidEnd = ws->tableEnd; + } + } + } + ws->phase = phase; + ZSTD_cwksp_assert_internal_consistency(ws); + } + return 0; +} + +/** + * Returns whether this object/buffer/etc was allocated in this workspace. + */ +MEM_STATIC int ZSTD_cwksp_owns_buffer(const ZSTD_cwksp* ws, const void* ptr) +{ + return (ptr != NULL) && (ws->workspace <= ptr) && (ptr < ws->workspaceEnd); +} + +/** + * Internal function. Do not use directly. + */ +MEM_STATIC void* +ZSTD_cwksp_reserve_internal(ZSTD_cwksp* ws, size_t bytes, ZSTD_cwksp_alloc_phase_e phase) +{ + void* alloc; + if (ZSTD_isError(ZSTD_cwksp_internal_advance_phase(ws, phase)) || bytes == 0) { + return NULL; + } + +#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) + /* over-reserve space */ + bytes += 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE; +#endif + + alloc = ZSTD_cwksp_reserve_internal_buffer_space(ws, bytes); + +#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) + /* Move alloc so there's ZSTD_CWKSP_ASAN_REDZONE_SIZE unused space on + * either size. */ + if (alloc) { + alloc = (BYTE *)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE; + if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) { + /* We need to keep the redzone poisoned while unpoisoning the bytes that + * are actually allocated. */ + __asan_unpoison_memory_region(alloc, bytes - 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE); + } + } +#endif + + return alloc; +} + +/** + * Reserves and returns unaligned memory. + */ +MEM_STATIC BYTE* ZSTD_cwksp_reserve_buffer(ZSTD_cwksp* ws, size_t bytes) +{ + return (BYTE*)ZSTD_cwksp_reserve_internal(ws, bytes, ZSTD_cwksp_alloc_buffers); +} + +/** + * Reserves and returns memory sized on and aligned on ZSTD_CWKSP_ALIGNMENT_BYTES (64 bytes). + * This memory has been initialized at least once in the past. + * This doesn't mean it has been initialized this time, and it might contain data from previous + * operations. + * The main usage is for algorithms that might need read access into uninitialized memory. + * The algorithm must maintain safety under these conditions and must make sure it doesn't + * leak any of the past data (directly or in side channels). + */ +MEM_STATIC void* ZSTD_cwksp_reserve_aligned_init_once(ZSTD_cwksp* ws, size_t bytes) +{ + size_t const alignedBytes = ZSTD_cwksp_align(bytes, ZSTD_CWKSP_ALIGNMENT_BYTES); + void* ptr = ZSTD_cwksp_reserve_internal(ws, alignedBytes, ZSTD_cwksp_alloc_aligned_init_once); + assert(((size_t)ptr & (ZSTD_CWKSP_ALIGNMENT_BYTES-1))== 0); + if(ptr && ptr < ws->initOnceStart) { + /* We assume the memory following the current allocation is either: + * 1. Not usable as initOnce memory (end of workspace) + * 2. Another initOnce buffer that has been allocated before (and so was previously memset) + * 3. An ASAN redzone, in which case we don't want to write on it + * For these reasons it should be fine to not explicitly zero every byte up to ws->initOnceStart. + * Note that we assume here that MSAN and ASAN cannot run in the same time. */ + ZSTD_memset(ptr, 0, MIN((size_t)((U8*)ws->initOnceStart - (U8*)ptr), alignedBytes)); + ws->initOnceStart = ptr; + } +#if ZSTD_MEMORY_SANITIZER + assert(__msan_test_shadow(ptr, bytes) == -1); +#endif + return ptr; +} + +/** + * Reserves and returns memory sized on and aligned on ZSTD_CWKSP_ALIGNMENT_BYTES (64 bytes). + */ +MEM_STATIC void* ZSTD_cwksp_reserve_aligned(ZSTD_cwksp* ws, size_t bytes) +{ + void* ptr = ZSTD_cwksp_reserve_internal(ws, ZSTD_cwksp_align(bytes, ZSTD_CWKSP_ALIGNMENT_BYTES), + ZSTD_cwksp_alloc_aligned); + assert(((size_t)ptr & (ZSTD_CWKSP_ALIGNMENT_BYTES-1))== 0); + return ptr; +} + +/** + * Aligned on 64 bytes. These buffers have the special property that + * their values remain constrained, allowing us to reuse them without + * memset()-ing them. + */ +MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes) +{ + const ZSTD_cwksp_alloc_phase_e phase = ZSTD_cwksp_alloc_aligned_init_once; + void* alloc; + void* end; + void* top; + + /* We can only start allocating tables after we are done reserving space for objects at the + * start of the workspace */ + if(ws->phase < phase) { + if (ZSTD_isError(ZSTD_cwksp_internal_advance_phase(ws, phase))) { + return NULL; + } + } + alloc = ws->tableEnd; + end = (BYTE *)alloc + bytes; + top = ws->allocStart; + + DEBUGLOG(5, "cwksp: reserving %p table %zd bytes, %zd bytes remaining", + alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes); + assert((bytes & (sizeof(U32)-1)) == 0); + ZSTD_cwksp_assert_internal_consistency(ws); + assert(end <= top); + if (end > top) { + DEBUGLOG(4, "cwksp: table alloc failed!"); + ws->allocFailed = 1; + return NULL; + } + ws->tableEnd = end; + +#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) + if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) { + __asan_unpoison_memory_region(alloc, bytes); + } +#endif + + assert((bytes & (ZSTD_CWKSP_ALIGNMENT_BYTES-1)) == 0); + assert(((size_t)alloc & (ZSTD_CWKSP_ALIGNMENT_BYTES-1))== 0); + return alloc; +} + +/** + * Aligned on sizeof(void*). + * Note : should happen only once, at workspace first initialization + */ +MEM_STATIC void* ZSTD_cwksp_reserve_object(ZSTD_cwksp* ws, size_t bytes) +{ + size_t const roundedBytes = ZSTD_cwksp_align(bytes, sizeof(void*)); + void* alloc = ws->objectEnd; + void* end = (BYTE*)alloc + roundedBytes; + +#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) + /* over-reserve space */ + end = (BYTE *)end + 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE; +#endif + + DEBUGLOG(4, + "cwksp: reserving %p object %zd bytes (rounded to %zd), %zd bytes remaining", + alloc, bytes, roundedBytes, ZSTD_cwksp_available_space(ws) - roundedBytes); + assert((size_t)alloc % ZSTD_ALIGNOF(void*) == 0); + assert(bytes % ZSTD_ALIGNOF(void*) == 0); + ZSTD_cwksp_assert_internal_consistency(ws); + /* we must be in the first phase, no advance is possible */ + if (ws->phase != ZSTD_cwksp_alloc_objects || end > ws->workspaceEnd) { + DEBUGLOG(3, "cwksp: object alloc failed!"); + ws->allocFailed = 1; + return NULL; + } + ws->objectEnd = end; + ws->tableEnd = end; + ws->tableValidEnd = end; + +#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) + /* Move alloc so there's ZSTD_CWKSP_ASAN_REDZONE_SIZE unused space on + * either size. */ + alloc = (BYTE*)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE; + if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) { + __asan_unpoison_memory_region(alloc, bytes); + } +#endif + + return alloc; +} + +MEM_STATIC void ZSTD_cwksp_mark_tables_dirty(ZSTD_cwksp* ws) +{ + DEBUGLOG(4, "cwksp: ZSTD_cwksp_mark_tables_dirty"); + +#if ZSTD_MEMORY_SANITIZER && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE) + /* To validate that the table reuse logic is sound, and that we don't + * access table space that we haven't cleaned, we re-"poison" the table + * space every time we mark it dirty. + * Since tableValidEnd space and initOnce space may overlap we don't poison + * the initOnce portion as it break its promise. This means that this poisoning + * check isn't always applied fully. */ + { + size_t size = (BYTE*)ws->tableValidEnd - (BYTE*)ws->objectEnd; + assert(__msan_test_shadow(ws->objectEnd, size) == -1); + if((BYTE*)ws->tableValidEnd < (BYTE*)ws->initOnceStart) { + __msan_poison(ws->objectEnd, size); + } else { + assert(ws->initOnceStart >= ws->objectEnd); + __msan_poison(ws->objectEnd, (BYTE*)ws->initOnceStart - (BYTE*)ws->objectEnd); + } + } +#endif + + assert(ws->tableValidEnd >= ws->objectEnd); + assert(ws->tableValidEnd <= ws->allocStart); + ws->tableValidEnd = ws->objectEnd; + ZSTD_cwksp_assert_internal_consistency(ws); +} + +MEM_STATIC void ZSTD_cwksp_mark_tables_clean(ZSTD_cwksp* ws) { + DEBUGLOG(4, "cwksp: ZSTD_cwksp_mark_tables_clean"); + assert(ws->tableValidEnd >= ws->objectEnd); + assert(ws->tableValidEnd <= ws->allocStart); + if (ws->tableValidEnd < ws->tableEnd) { + ws->tableValidEnd = ws->tableEnd; + } + ZSTD_cwksp_assert_internal_consistency(ws); +} + +/** + * Zero the part of the allocated tables not already marked clean. + */ +MEM_STATIC void ZSTD_cwksp_clean_tables(ZSTD_cwksp* ws) { + DEBUGLOG(4, "cwksp: ZSTD_cwksp_clean_tables"); + assert(ws->tableValidEnd >= ws->objectEnd); + assert(ws->tableValidEnd <= ws->allocStart); + if (ws->tableValidEnd < ws->tableEnd) { + ZSTD_memset(ws->tableValidEnd, 0, (size_t)((BYTE*)ws->tableEnd - (BYTE*)ws->tableValidEnd)); + } + ZSTD_cwksp_mark_tables_clean(ws); +} + +/** + * Invalidates table allocations. + * All other allocations remain valid. + */ +MEM_STATIC void ZSTD_cwksp_clear_tables(ZSTD_cwksp* ws) { + DEBUGLOG(4, "cwksp: clearing tables!"); + +#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) + /* We don't do this when the workspace is statically allocated, because + * when that is the case, we have no capability to hook into the end of the + * workspace's lifecycle to unpoison the memory. + */ + if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) { + size_t size = (BYTE*)ws->tableValidEnd - (BYTE*)ws->objectEnd; + __asan_poison_memory_region(ws->objectEnd, size); + } +#endif + + ws->tableEnd = ws->objectEnd; + ZSTD_cwksp_assert_internal_consistency(ws); +} + +/** + * Invalidates all buffer, aligned, and table allocations. + * Object allocations remain valid. + */ +MEM_STATIC void ZSTD_cwksp_clear(ZSTD_cwksp* ws) { + DEBUGLOG(4, "cwksp: clearing!"); + +#if ZSTD_MEMORY_SANITIZER && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE) + /* To validate that the context reuse logic is sound, and that we don't + * access stuff that this compression hasn't initialized, we re-"poison" + * the workspace except for the areas in which we expect memory reuse + * without initialization (objects, valid tables area and init once + * memory). */ + { + if((BYTE*)ws->tableValidEnd < (BYTE*)ws->initOnceStart) { + size_t size = (BYTE*)ws->initOnceStart - (BYTE*)ws->tableValidEnd; + __msan_poison(ws->tableValidEnd, size); + } + } +#endif + +#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) + /* We don't do this when the workspace is statically allocated, because + * when that is the case, we have no capability to hook into the end of the + * workspace's lifecycle to unpoison the memory. + */ + if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) { + size_t size = (BYTE*)ws->workspaceEnd - (BYTE*)ws->objectEnd; + __asan_poison_memory_region(ws->objectEnd, size); + } +#endif + + ws->tableEnd = ws->objectEnd; + ws->allocStart = ZSTD_cwksp_initialAllocStart(ws); + ws->allocFailed = 0; + if (ws->phase > ZSTD_cwksp_alloc_aligned_init_once) { + ws->phase = ZSTD_cwksp_alloc_aligned_init_once; + } + ZSTD_cwksp_assert_internal_consistency(ws); +} + +MEM_STATIC size_t ZSTD_cwksp_sizeof(const ZSTD_cwksp* ws) { + return (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->workspace); +} + +MEM_STATIC size_t ZSTD_cwksp_used(const ZSTD_cwksp* ws) { + return (size_t)((BYTE*)ws->tableEnd - (BYTE*)ws->workspace) + + (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->allocStart); +} + +/** + * The provided workspace takes ownership of the buffer [start, start+size). + * Any existing values in the workspace are ignored (the previously managed + * buffer, if present, must be separately freed). + */ +MEM_STATIC void ZSTD_cwksp_init(ZSTD_cwksp* ws, void* start, size_t size, ZSTD_cwksp_static_alloc_e isStatic) { + DEBUGLOG(4, "cwksp: init'ing workspace with %zd bytes", size); + assert(((size_t)start & (sizeof(void*)-1)) == 0); /* ensure correct alignment */ + ws->workspace = start; + ws->workspaceEnd = (BYTE*)start + size; + ws->objectEnd = ws->workspace; + ws->tableValidEnd = ws->objectEnd; + ws->initOnceStart = ZSTD_cwksp_initialAllocStart(ws); + ws->phase = ZSTD_cwksp_alloc_objects; + ws->isStatic = isStatic; + ZSTD_cwksp_clear(ws); + ws->workspaceOversizedDuration = 0; + ZSTD_cwksp_assert_internal_consistency(ws); +} + +MEM_STATIC size_t ZSTD_cwksp_create(ZSTD_cwksp* ws, size_t size, ZSTD_customMem customMem) { + void* workspace = ZSTD_customMalloc(size, customMem); + DEBUGLOG(4, "cwksp: creating new workspace with %zd bytes", size); + RETURN_ERROR_IF(workspace == NULL, memory_allocation, "NULL pointer!"); + ZSTD_cwksp_init(ws, workspace, size, ZSTD_cwksp_dynamic_alloc); + return 0; +} + +MEM_STATIC void ZSTD_cwksp_free(ZSTD_cwksp* ws, ZSTD_customMem customMem) { + void *ptr = ws->workspace; + DEBUGLOG(4, "cwksp: freeing workspace"); +#if ZSTD_MEMORY_SANITIZER && !defined(ZSTD_MSAN_DONT_POISON_WORKSPACE) + if (ptr != NULL && customMem.customFree != NULL) { + __msan_unpoison(ptr, ZSTD_cwksp_sizeof(ws)); + } +#endif + ZSTD_memset(ws, 0, sizeof(ZSTD_cwksp)); + ZSTD_customFree(ptr, customMem); +} + +/** + * Moves the management of a workspace from one cwksp to another. The src cwksp + * is left in an invalid state (src must be re-init()'ed before it's used again). + */ +MEM_STATIC void ZSTD_cwksp_move(ZSTD_cwksp* dst, ZSTD_cwksp* src) { + *dst = *src; + ZSTD_memset(src, 0, sizeof(ZSTD_cwksp)); +} + +MEM_STATIC int ZSTD_cwksp_reserve_failed(const ZSTD_cwksp* ws) { + return ws->allocFailed; +} + +/*-************************************* +* Functions Checking Free Space +***************************************/ + +/* ZSTD_alignmentSpaceWithinBounds() : + * Returns if the estimated space needed for a wksp is within an acceptable limit of the + * actual amount of space used. + */ +MEM_STATIC int ZSTD_cwksp_estimated_space_within_bounds(const ZSTD_cwksp *const ws, size_t const estimatedSpace) { + /* We have an alignment space between objects and tables between tables and buffers, so we can have up to twice + * the alignment bytes difference between estimation and actual usage */ + return (estimatedSpace - ZSTD_cwksp_slack_space_required()) <= ZSTD_cwksp_used(ws) && + ZSTD_cwksp_used(ws) <= estimatedSpace; +} + + +MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws) { + return (size_t)((BYTE*)ws->allocStart - (BYTE*)ws->tableEnd); +} + +MEM_STATIC int ZSTD_cwksp_check_available(ZSTD_cwksp* ws, size_t additionalNeededSpace) { + return ZSTD_cwksp_available_space(ws) >= additionalNeededSpace; +} + +MEM_STATIC int ZSTD_cwksp_check_too_large(ZSTD_cwksp* ws, size_t additionalNeededSpace) { + return ZSTD_cwksp_check_available( + ws, additionalNeededSpace * ZSTD_WORKSPACETOOLARGE_FACTOR); +} + +MEM_STATIC int ZSTD_cwksp_check_wasteful(ZSTD_cwksp* ws, size_t additionalNeededSpace) { + return ZSTD_cwksp_check_too_large(ws, additionalNeededSpace) + && ws->workspaceOversizedDuration > ZSTD_WORKSPACETOOLARGE_MAXDURATION; +} + +MEM_STATIC void ZSTD_cwksp_bump_oversized_duration( + ZSTD_cwksp* ws, size_t additionalNeededSpace) { + if (ZSTD_cwksp_check_too_large(ws, additionalNeededSpace)) { + ws->workspaceOversizedDuration++; + } else { + ws->workspaceOversizedDuration = 0; + } +} + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_CWKSP_H */ diff --git a/c-blosc/internal-complibs/zstd-1.5.6/compress/zstd_double_fast.c b/c-blosc/internal-complibs/zstd-1.5.6/compress/zstd_double_fast.c new file mode 100644 index 0000000..a4e9c50 --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.5.6/compress/zstd_double_fast.c @@ -0,0 +1,770 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#include "zstd_compress_internal.h" +#include "zstd_double_fast.h" + +#ifndef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR + +static +ZSTD_ALLOW_POINTER_OVERFLOW_ATTR +void ZSTD_fillDoubleHashTableForCDict(ZSTD_matchState_t* ms, + void const* end, ZSTD_dictTableLoadMethod_e dtlm) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32* const hashLarge = ms->hashTable; + U32 const hBitsL = cParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS; + U32 const mls = cParams->minMatch; + U32* const hashSmall = ms->chainTable; + U32 const hBitsS = cParams->chainLog + ZSTD_SHORT_CACHE_TAG_BITS; + const BYTE* const base = ms->window.base; + const BYTE* ip = base + ms->nextToUpdate; + const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE; + const U32 fastHashFillStep = 3; + + /* Always insert every fastHashFillStep position into the hash tables. + * Insert the other positions into the large hash table if their entry + * is empty. + */ + for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) { + U32 const curr = (U32)(ip - base); + U32 i; + for (i = 0; i < fastHashFillStep; ++i) { + size_t const smHashAndTag = ZSTD_hashPtr(ip + i, hBitsS, mls); + size_t const lgHashAndTag = ZSTD_hashPtr(ip + i, hBitsL, 8); + if (i == 0) { + ZSTD_writeTaggedIndex(hashSmall, smHashAndTag, curr + i); + } + if (i == 0 || hashLarge[lgHashAndTag >> ZSTD_SHORT_CACHE_TAG_BITS] == 0) { + ZSTD_writeTaggedIndex(hashLarge, lgHashAndTag, curr + i); + } + /* Only load extra positions for ZSTD_dtlm_full */ + if (dtlm == ZSTD_dtlm_fast) + break; + } } +} + +static +ZSTD_ALLOW_POINTER_OVERFLOW_ATTR +void ZSTD_fillDoubleHashTableForCCtx(ZSTD_matchState_t* ms, + void const* end, ZSTD_dictTableLoadMethod_e dtlm) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32* const hashLarge = ms->hashTable; + U32 const hBitsL = cParams->hashLog; + U32 const mls = cParams->minMatch; + U32* const hashSmall = ms->chainTable; + U32 const hBitsS = cParams->chainLog; + const BYTE* const base = ms->window.base; + const BYTE* ip = base + ms->nextToUpdate; + const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE; + const U32 fastHashFillStep = 3; + + /* Always insert every fastHashFillStep position into the hash tables. + * Insert the other positions into the large hash table if their entry + * is empty. + */ + for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) { + U32 const curr = (U32)(ip - base); + U32 i; + for (i = 0; i < fastHashFillStep; ++i) { + size_t const smHash = ZSTD_hashPtr(ip + i, hBitsS, mls); + size_t const lgHash = ZSTD_hashPtr(ip + i, hBitsL, 8); + if (i == 0) + hashSmall[smHash] = curr + i; + if (i == 0 || hashLarge[lgHash] == 0) + hashLarge[lgHash] = curr + i; + /* Only load extra positions for ZSTD_dtlm_full */ + if (dtlm == ZSTD_dtlm_fast) + break; + } } +} + +void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms, + const void* const end, + ZSTD_dictTableLoadMethod_e dtlm, + ZSTD_tableFillPurpose_e tfp) +{ + if (tfp == ZSTD_tfp_forCDict) { + ZSTD_fillDoubleHashTableForCDict(ms, end, dtlm); + } else { + ZSTD_fillDoubleHashTableForCCtx(ms, end, dtlm); + } +} + + +FORCE_INLINE_TEMPLATE +ZSTD_ALLOW_POINTER_OVERFLOW_ATTR +size_t ZSTD_compressBlock_doubleFast_noDict_generic( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize, U32 const mls /* template */) +{ + ZSTD_compressionParameters const* cParams = &ms->cParams; + U32* const hashLong = ms->hashTable; + const U32 hBitsL = cParams->hashLog; + U32* const hashSmall = ms->chainTable; + const U32 hBitsS = cParams->chainLog; + const BYTE* const base = ms->window.base; + const BYTE* const istart = (const BYTE*)src; + const BYTE* anchor = istart; + const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); + /* presumes that, if there is a dictionary, it must be using Attach mode */ + const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog); + const BYTE* const prefixLowest = base + prefixLowestIndex; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - HASH_READ_SIZE; + U32 offset_1=rep[0], offset_2=rep[1]; + U32 offsetSaved1 = 0, offsetSaved2 = 0; + + size_t mLength; + U32 offset; + U32 curr; + + /* how many positions to search before increasing step size */ + const size_t kStepIncr = 1 << kSearchStrength; + /* the position at which to increment the step size if no match is found */ + const BYTE* nextStep; + size_t step; /* the current step size */ + + size_t hl0; /* the long hash at ip */ + size_t hl1; /* the long hash at ip1 */ + + U32 idxl0; /* the long match index for ip */ + U32 idxl1; /* the long match index for ip1 */ + + const BYTE* matchl0; /* the long match for ip */ + const BYTE* matchs0; /* the short match for ip */ + const BYTE* matchl1; /* the long match for ip1 */ + + const BYTE* ip = istart; /* the current position */ + const BYTE* ip1; /* the next position */ + + DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_noDict_generic"); + + /* init */ + ip += ((ip - prefixLowest) == 0); + { + U32 const current = (U32)(ip - base); + U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog); + U32 const maxRep = current - windowLow; + if (offset_2 > maxRep) offsetSaved2 = offset_2, offset_2 = 0; + if (offset_1 > maxRep) offsetSaved1 = offset_1, offset_1 = 0; + } + + /* Outer Loop: one iteration per match found and stored */ + while (1) { + step = 1; + nextStep = ip + kStepIncr; + ip1 = ip + step; + + if (ip1 > ilimit) { + goto _cleanup; + } + + hl0 = ZSTD_hashPtr(ip, hBitsL, 8); + idxl0 = hashLong[hl0]; + matchl0 = base + idxl0; + + /* Inner Loop: one iteration per search / position */ + do { + const size_t hs0 = ZSTD_hashPtr(ip, hBitsS, mls); + const U32 idxs0 = hashSmall[hs0]; + curr = (U32)(ip-base); + matchs0 = base + idxs0; + + hashLong[hl0] = hashSmall[hs0] = curr; /* update hash tables */ + + /* check noDict repcode */ + if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) { + mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; + ip++; + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength); + goto _match_stored; + } + + hl1 = ZSTD_hashPtr(ip1, hBitsL, 8); + + if (idxl0 > prefixLowestIndex) { + /* check prefix long match */ + if (MEM_read64(matchl0) == MEM_read64(ip)) { + mLength = ZSTD_count(ip+8, matchl0+8, iend) + 8; + offset = (U32)(ip-matchl0); + while (((ip>anchor) & (matchl0>prefixLowest)) && (ip[-1] == matchl0[-1])) { ip--; matchl0--; mLength++; } /* catch up */ + goto _match_found; + } + } + + idxl1 = hashLong[hl1]; + matchl1 = base + idxl1; + + if (idxs0 > prefixLowestIndex) { + /* check prefix short match */ + if (MEM_read32(matchs0) == MEM_read32(ip)) { + goto _search_next_long; + } + } + + if (ip1 >= nextStep) { + PREFETCH_L1(ip1 + 64); + PREFETCH_L1(ip1 + 128); + step++; + nextStep += kStepIncr; + } + ip = ip1; + ip1 += step; + + hl0 = hl1; + idxl0 = idxl1; + matchl0 = matchl1; + #if defined(__aarch64__) + PREFETCH_L1(ip+256); + #endif + } while (ip1 <= ilimit); + +_cleanup: + /* If offset_1 started invalid (offsetSaved1 != 0) and became valid (offset_1 != 0), + * rotate saved offsets. See comment in ZSTD_compressBlock_fast_noDict for more context. */ + offsetSaved2 = ((offsetSaved1 != 0) && (offset_1 != 0)) ? offsetSaved1 : offsetSaved2; + + /* save reps for next block */ + rep[0] = offset_1 ? offset_1 : offsetSaved1; + rep[1] = offset_2 ? offset_2 : offsetSaved2; + + /* Return the last literals size */ + return (size_t)(iend - anchor); + +_search_next_long: + + /* check prefix long +1 match */ + if (idxl1 > prefixLowestIndex) { + if (MEM_read64(matchl1) == MEM_read64(ip1)) { + ip = ip1; + mLength = ZSTD_count(ip+8, matchl1+8, iend) + 8; + offset = (U32)(ip-matchl1); + while (((ip>anchor) & (matchl1>prefixLowest)) && (ip[-1] == matchl1[-1])) { ip--; matchl1--; mLength++; } /* catch up */ + goto _match_found; + } + } + + /* if no long +1 match, explore the short match we found */ + mLength = ZSTD_count(ip+4, matchs0+4, iend) + 4; + offset = (U32)(ip - matchs0); + while (((ip>anchor) & (matchs0>prefixLowest)) && (ip[-1] == matchs0[-1])) { ip--; matchs0--; mLength++; } /* catch up */ + + /* fall-through */ + +_match_found: /* requires ip, offset, mLength */ + offset_2 = offset_1; + offset_1 = offset; + + if (step < 4) { + /* It is unsafe to write this value back to the hashtable when ip1 is + * greater than or equal to the new ip we will have after we're done + * processing this match. Rather than perform that test directly + * (ip1 >= ip + mLength), which costs speed in practice, we do a simpler + * more predictable test. The minmatch even if we take a short match is + * 4 bytes, so as long as step, the distance between ip and ip1 + * (initially) is less than 4, we know ip1 < new ip. */ + hashLong[hl1] = (U32)(ip1 - base); + } + + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength); + +_match_stored: + /* match found */ + ip += mLength; + anchor = ip; + + if (ip <= ilimit) { + /* Complementary insertion */ + /* done after iLimit test, as candidates could be > iend-8 */ + { U32 const indexToInsert = curr+2; + hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert; + hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base); + hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert; + hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base); + } + + /* check immediate repcode */ + while ( (ip <= ilimit) + && ( (offset_2>0) + & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) { + /* store sequence */ + size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; + U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */ + hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base); + hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base); + ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, rLength); + ip += rLength; + anchor = ip; + continue; /* faster when present ... (?) */ + } + } + } +} + + +FORCE_INLINE_TEMPLATE +ZSTD_ALLOW_POINTER_OVERFLOW_ATTR +size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize, + U32 const mls /* template */) +{ + ZSTD_compressionParameters const* cParams = &ms->cParams; + U32* const hashLong = ms->hashTable; + const U32 hBitsL = cParams->hashLog; + U32* const hashSmall = ms->chainTable; + const U32 hBitsS = cParams->chainLog; + const BYTE* const base = ms->window.base; + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); + /* presumes that, if there is a dictionary, it must be using Attach mode */ + const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog); + const BYTE* const prefixLowest = base + prefixLowestIndex; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - HASH_READ_SIZE; + U32 offset_1=rep[0], offset_2=rep[1]; + + const ZSTD_matchState_t* const dms = ms->dictMatchState; + const ZSTD_compressionParameters* const dictCParams = &dms->cParams; + const U32* const dictHashLong = dms->hashTable; + const U32* const dictHashSmall = dms->chainTable; + const U32 dictStartIndex = dms->window.dictLimit; + const BYTE* const dictBase = dms->window.base; + const BYTE* const dictStart = dictBase + dictStartIndex; + const BYTE* const dictEnd = dms->window.nextSrc; + const U32 dictIndexDelta = prefixLowestIndex - (U32)(dictEnd - dictBase); + const U32 dictHBitsL = dictCParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS; + const U32 dictHBitsS = dictCParams->chainLog + ZSTD_SHORT_CACHE_TAG_BITS; + const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictStart)); + + DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_dictMatchState_generic"); + + /* if a dictionary is attached, it must be within window range */ + assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex); + + if (ms->prefetchCDictTables) { + size_t const hashTableBytes = (((size_t)1) << dictCParams->hashLog) * sizeof(U32); + size_t const chainTableBytes = (((size_t)1) << dictCParams->chainLog) * sizeof(U32); + PREFETCH_AREA(dictHashLong, hashTableBytes); + PREFETCH_AREA(dictHashSmall, chainTableBytes); + } + + /* init */ + ip += (dictAndPrefixLength == 0); + + /* dictMatchState repCode checks don't currently handle repCode == 0 + * disabling. */ + assert(offset_1 <= dictAndPrefixLength); + assert(offset_2 <= dictAndPrefixLength); + + /* Main Search Loop */ + while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ + size_t mLength; + U32 offset; + size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8); + size_t const h = ZSTD_hashPtr(ip, hBitsS, mls); + size_t const dictHashAndTagL = ZSTD_hashPtr(ip, dictHBitsL, 8); + size_t const dictHashAndTagS = ZSTD_hashPtr(ip, dictHBitsS, mls); + U32 const dictMatchIndexAndTagL = dictHashLong[dictHashAndTagL >> ZSTD_SHORT_CACHE_TAG_BITS]; + U32 const dictMatchIndexAndTagS = dictHashSmall[dictHashAndTagS >> ZSTD_SHORT_CACHE_TAG_BITS]; + int const dictTagsMatchL = ZSTD_comparePackedTags(dictMatchIndexAndTagL, dictHashAndTagL); + int const dictTagsMatchS = ZSTD_comparePackedTags(dictMatchIndexAndTagS, dictHashAndTagS); + U32 const curr = (U32)(ip-base); + U32 const matchIndexL = hashLong[h2]; + U32 matchIndexS = hashSmall[h]; + const BYTE* matchLong = base + matchIndexL; + const BYTE* match = base + matchIndexS; + const U32 repIndex = curr + 1 - offset_1; + const BYTE* repMatch = (repIndex < prefixLowestIndex) ? + dictBase + (repIndex - dictIndexDelta) : + base + repIndex; + hashLong[h2] = hashSmall[h] = curr; /* update hash tables */ + + /* check repcode */ + if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) + && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { + const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; + mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; + ip++; + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength); + goto _match_stored; + } + + if (matchIndexL > prefixLowestIndex) { + /* check prefix long match */ + if (MEM_read64(matchLong) == MEM_read64(ip)) { + mLength = ZSTD_count(ip+8, matchLong+8, iend) + 8; + offset = (U32)(ip-matchLong); + while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ + goto _match_found; + } + } else if (dictTagsMatchL) { + /* check dictMatchState long match */ + U32 const dictMatchIndexL = dictMatchIndexAndTagL >> ZSTD_SHORT_CACHE_TAG_BITS; + const BYTE* dictMatchL = dictBase + dictMatchIndexL; + assert(dictMatchL < dictEnd); + + if (dictMatchL > dictStart && MEM_read64(dictMatchL) == MEM_read64(ip)) { + mLength = ZSTD_count_2segments(ip+8, dictMatchL+8, iend, dictEnd, prefixLowest) + 8; + offset = (U32)(curr - dictMatchIndexL - dictIndexDelta); + while (((ip>anchor) & (dictMatchL>dictStart)) && (ip[-1] == dictMatchL[-1])) { ip--; dictMatchL--; mLength++; } /* catch up */ + goto _match_found; + } } + + if (matchIndexS > prefixLowestIndex) { + /* check prefix short match */ + if (MEM_read32(match) == MEM_read32(ip)) { + goto _search_next_long; + } + } else if (dictTagsMatchS) { + /* check dictMatchState short match */ + U32 const dictMatchIndexS = dictMatchIndexAndTagS >> ZSTD_SHORT_CACHE_TAG_BITS; + match = dictBase + dictMatchIndexS; + matchIndexS = dictMatchIndexS + dictIndexDelta; + + if (match > dictStart && MEM_read32(match) == MEM_read32(ip)) { + goto _search_next_long; + } } + + ip += ((ip-anchor) >> kSearchStrength) + 1; +#if defined(__aarch64__) + PREFETCH_L1(ip+256); +#endif + continue; + +_search_next_long: + { size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8); + size_t const dictHashAndTagL3 = ZSTD_hashPtr(ip+1, dictHBitsL, 8); + U32 const matchIndexL3 = hashLong[hl3]; + U32 const dictMatchIndexAndTagL3 = dictHashLong[dictHashAndTagL3 >> ZSTD_SHORT_CACHE_TAG_BITS]; + int const dictTagsMatchL3 = ZSTD_comparePackedTags(dictMatchIndexAndTagL3, dictHashAndTagL3); + const BYTE* matchL3 = base + matchIndexL3; + hashLong[hl3] = curr + 1; + + /* check prefix long +1 match */ + if (matchIndexL3 > prefixLowestIndex) { + if (MEM_read64(matchL3) == MEM_read64(ip+1)) { + mLength = ZSTD_count(ip+9, matchL3+8, iend) + 8; + ip++; + offset = (U32)(ip-matchL3); + while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */ + goto _match_found; + } + } else if (dictTagsMatchL3) { + /* check dict long +1 match */ + U32 const dictMatchIndexL3 = dictMatchIndexAndTagL3 >> ZSTD_SHORT_CACHE_TAG_BITS; + const BYTE* dictMatchL3 = dictBase + dictMatchIndexL3; + assert(dictMatchL3 < dictEnd); + if (dictMatchL3 > dictStart && MEM_read64(dictMatchL3) == MEM_read64(ip+1)) { + mLength = ZSTD_count_2segments(ip+1+8, dictMatchL3+8, iend, dictEnd, prefixLowest) + 8; + ip++; + offset = (U32)(curr + 1 - dictMatchIndexL3 - dictIndexDelta); + while (((ip>anchor) & (dictMatchL3>dictStart)) && (ip[-1] == dictMatchL3[-1])) { ip--; dictMatchL3--; mLength++; } /* catch up */ + goto _match_found; + } } } + + /* if no long +1 match, explore the short match we found */ + if (matchIndexS < prefixLowestIndex) { + mLength = ZSTD_count_2segments(ip+4, match+4, iend, dictEnd, prefixLowest) + 4; + offset = (U32)(curr - matchIndexS); + while (((ip>anchor) & (match>dictStart)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ + } else { + mLength = ZSTD_count(ip+4, match+4, iend) + 4; + offset = (U32)(ip - match); + while (((ip>anchor) & (match>prefixLowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ + } + +_match_found: + offset_2 = offset_1; + offset_1 = offset; + + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength); + +_match_stored: + /* match found */ + ip += mLength; + anchor = ip; + + if (ip <= ilimit) { + /* Complementary insertion */ + /* done after iLimit test, as candidates could be > iend-8 */ + { U32 const indexToInsert = curr+2; + hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert; + hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base); + hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert; + hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base); + } + + /* check immediate repcode */ + while (ip <= ilimit) { + U32 const current2 = (U32)(ip-base); + U32 const repIndex2 = current2 - offset_2; + const BYTE* repMatch2 = repIndex2 < prefixLowestIndex ? + dictBase + repIndex2 - dictIndexDelta : + base + repIndex2; + if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */) + && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { + const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend; + size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4; + U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ + ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, repLength2); + hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2; + hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2; + ip += repLength2; + anchor = ip; + continue; + } + break; + } + } + } /* while (ip < ilimit) */ + + /* save reps for next block */ + rep[0] = offset_1; + rep[1] = offset_2; + + /* Return the last literals size */ + return (size_t)(iend - anchor); +} + +#define ZSTD_GEN_DFAST_FN(dictMode, mls) \ + static size_t ZSTD_compressBlock_doubleFast_##dictMode##_##mls( \ + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], \ + void const* src, size_t srcSize) \ + { \ + return ZSTD_compressBlock_doubleFast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mls); \ + } + +ZSTD_GEN_DFAST_FN(noDict, 4) +ZSTD_GEN_DFAST_FN(noDict, 5) +ZSTD_GEN_DFAST_FN(noDict, 6) +ZSTD_GEN_DFAST_FN(noDict, 7) + +ZSTD_GEN_DFAST_FN(dictMatchState, 4) +ZSTD_GEN_DFAST_FN(dictMatchState, 5) +ZSTD_GEN_DFAST_FN(dictMatchState, 6) +ZSTD_GEN_DFAST_FN(dictMatchState, 7) + + +size_t ZSTD_compressBlock_doubleFast( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + const U32 mls = ms->cParams.minMatch; + switch(mls) + { + default: /* includes case 3 */ + case 4 : + return ZSTD_compressBlock_doubleFast_noDict_4(ms, seqStore, rep, src, srcSize); + case 5 : + return ZSTD_compressBlock_doubleFast_noDict_5(ms, seqStore, rep, src, srcSize); + case 6 : + return ZSTD_compressBlock_doubleFast_noDict_6(ms, seqStore, rep, src, srcSize); + case 7 : + return ZSTD_compressBlock_doubleFast_noDict_7(ms, seqStore, rep, src, srcSize); + } +} + + +size_t ZSTD_compressBlock_doubleFast_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + const U32 mls = ms->cParams.minMatch; + switch(mls) + { + default: /* includes case 3 */ + case 4 : + return ZSTD_compressBlock_doubleFast_dictMatchState_4(ms, seqStore, rep, src, srcSize); + case 5 : + return ZSTD_compressBlock_doubleFast_dictMatchState_5(ms, seqStore, rep, src, srcSize); + case 6 : + return ZSTD_compressBlock_doubleFast_dictMatchState_6(ms, seqStore, rep, src, srcSize); + case 7 : + return ZSTD_compressBlock_doubleFast_dictMatchState_7(ms, seqStore, rep, src, srcSize); + } +} + + +static +ZSTD_ALLOW_POINTER_OVERFLOW_ATTR +size_t ZSTD_compressBlock_doubleFast_extDict_generic( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize, + U32 const mls /* template */) +{ + ZSTD_compressionParameters const* cParams = &ms->cParams; + U32* const hashLong = ms->hashTable; + U32 const hBitsL = cParams->hashLog; + U32* const hashSmall = ms->chainTable; + U32 const hBitsS = cParams->chainLog; + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - 8; + const BYTE* const base = ms->window.base; + const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); + const U32 lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog); + const U32 dictStartIndex = lowLimit; + const U32 dictLimit = ms->window.dictLimit; + const U32 prefixStartIndex = (dictLimit > lowLimit) ? dictLimit : lowLimit; + const BYTE* const prefixStart = base + prefixStartIndex; + const BYTE* const dictBase = ms->window.dictBase; + const BYTE* const dictStart = dictBase + dictStartIndex; + const BYTE* const dictEnd = dictBase + prefixStartIndex; + U32 offset_1=rep[0], offset_2=rep[1]; + + DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_extDict_generic (srcSize=%zu)", srcSize); + + /* if extDict is invalidated due to maxDistance, switch to "regular" variant */ + if (prefixStartIndex == dictStartIndex) + return ZSTD_compressBlock_doubleFast(ms, seqStore, rep, src, srcSize); + + /* Search Loop */ + while (ip < ilimit) { /* < instead of <=, because (ip+1) */ + const size_t hSmall = ZSTD_hashPtr(ip, hBitsS, mls); + const U32 matchIndex = hashSmall[hSmall]; + const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base; + const BYTE* match = matchBase + matchIndex; + + const size_t hLong = ZSTD_hashPtr(ip, hBitsL, 8); + const U32 matchLongIndex = hashLong[hLong]; + const BYTE* const matchLongBase = matchLongIndex < prefixStartIndex ? dictBase : base; + const BYTE* matchLong = matchLongBase + matchLongIndex; + + const U32 curr = (U32)(ip-base); + const U32 repIndex = curr + 1 - offset_1; /* offset_1 expected <= curr +1 */ + const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base; + const BYTE* const repMatch = repBase + repIndex; + size_t mLength; + hashSmall[hSmall] = hashLong[hLong] = curr; /* update hash table */ + + if ((((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex doesn't overlap dict + prefix */ + & (offset_1 <= curr+1 - dictStartIndex)) /* note: we are searching at curr+1 */ + && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { + const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; + mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4; + ip++; + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength); + } else { + if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) { + const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend; + const BYTE* const lowMatchPtr = matchLongIndex < prefixStartIndex ? dictStart : prefixStart; + U32 offset; + mLength = ZSTD_count_2segments(ip+8, matchLong+8, iend, matchEnd, prefixStart) + 8; + offset = curr - matchLongIndex; + while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ + offset_2 = offset_1; + offset_1 = offset; + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength); + + } else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) { + size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8); + U32 const matchIndex3 = hashLong[h3]; + const BYTE* const match3Base = matchIndex3 < prefixStartIndex ? dictBase : base; + const BYTE* match3 = match3Base + matchIndex3; + U32 offset; + hashLong[h3] = curr + 1; + if ( (matchIndex3 > dictStartIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) { + const BYTE* const matchEnd = matchIndex3 < prefixStartIndex ? dictEnd : iend; + const BYTE* const lowMatchPtr = matchIndex3 < prefixStartIndex ? dictStart : prefixStart; + mLength = ZSTD_count_2segments(ip+9, match3+8, iend, matchEnd, prefixStart) + 8; + ip++; + offset = curr+1 - matchIndex3; + while (((ip>anchor) & (match3>lowMatchPtr)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */ + } else { + const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend; + const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart; + mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4; + offset = curr - matchIndex; + while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ + } + offset_2 = offset_1; + offset_1 = offset; + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength); + + } else { + ip += ((ip-anchor) >> kSearchStrength) + 1; + continue; + } } + + /* move to next sequence start */ + ip += mLength; + anchor = ip; + + if (ip <= ilimit) { + /* Complementary insertion */ + /* done after iLimit test, as candidates could be > iend-8 */ + { U32 const indexToInsert = curr+2; + hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert; + hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base); + hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert; + hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base); + } + + /* check immediate repcode */ + while (ip <= ilimit) { + U32 const current2 = (U32)(ip-base); + U32 const repIndex2 = current2 - offset_2; + const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2; + if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) /* intentional overflow : ensure repIndex2 doesn't overlap dict + prefix */ + & (offset_2 <= current2 - dictStartIndex)) + && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { + const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; + size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; + U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ + ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, repLength2); + hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2; + hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2; + ip += repLength2; + anchor = ip; + continue; + } + break; + } } } + + /* save reps for next block */ + rep[0] = offset_1; + rep[1] = offset_2; + + /* Return the last literals size */ + return (size_t)(iend - anchor); +} + +ZSTD_GEN_DFAST_FN(extDict, 4) +ZSTD_GEN_DFAST_FN(extDict, 5) +ZSTD_GEN_DFAST_FN(extDict, 6) +ZSTD_GEN_DFAST_FN(extDict, 7) + +size_t ZSTD_compressBlock_doubleFast_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + U32 const mls = ms->cParams.minMatch; + switch(mls) + { + default: /* includes case 3 */ + case 4 : + return ZSTD_compressBlock_doubleFast_extDict_4(ms, seqStore, rep, src, srcSize); + case 5 : + return ZSTD_compressBlock_doubleFast_extDict_5(ms, seqStore, rep, src, srcSize); + case 6 : + return ZSTD_compressBlock_doubleFast_extDict_6(ms, seqStore, rep, src, srcSize); + case 7 : + return ZSTD_compressBlock_doubleFast_extDict_7(ms, seqStore, rep, src, srcSize); + } +} + +#endif /* ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR */ diff --git a/c-blosc/internal-complibs/zstd-1.5.6/compress/zstd_double_fast.h b/c-blosc/internal-complibs/zstd-1.5.6/compress/zstd_double_fast.h new file mode 100644 index 0000000..ce6ed8c --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.5.6/compress/zstd_double_fast.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_DOUBLE_FAST_H +#define ZSTD_DOUBLE_FAST_H + +#if defined (__cplusplus) +extern "C" { +#endif + +#include "../common/mem.h" /* U32 */ +#include "zstd_compress_internal.h" /* ZSTD_CCtx, size_t */ + +#ifndef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR + +void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms, + void const* end, ZSTD_dictTableLoadMethod_e dtlm, + ZSTD_tableFillPurpose_e tfp); + +size_t ZSTD_compressBlock_doubleFast( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_doubleFast_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_doubleFast_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); + +#define ZSTD_COMPRESSBLOCK_DOUBLEFAST ZSTD_compressBlock_doubleFast +#define ZSTD_COMPRESSBLOCK_DOUBLEFAST_DICTMATCHSTATE ZSTD_compressBlock_doubleFast_dictMatchState +#define ZSTD_COMPRESSBLOCK_DOUBLEFAST_EXTDICT ZSTD_compressBlock_doubleFast_extDict +#else +#define ZSTD_COMPRESSBLOCK_DOUBLEFAST NULL +#define ZSTD_COMPRESSBLOCK_DOUBLEFAST_DICTMATCHSTATE NULL +#define ZSTD_COMPRESSBLOCK_DOUBLEFAST_EXTDICT NULL +#endif /* ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR */ + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_DOUBLE_FAST_H */ diff --git a/c-blosc/internal-complibs/zstd-1.5.6/compress/zstd_fast.c b/c-blosc/internal-complibs/zstd-1.5.6/compress/zstd_fast.c new file mode 100644 index 0000000..6c4554c --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.5.6/compress/zstd_fast.c @@ -0,0 +1,968 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#include "zstd_compress_internal.h" /* ZSTD_hashPtr, ZSTD_count, ZSTD_storeSeq */ +#include "zstd_fast.h" + +static +ZSTD_ALLOW_POINTER_OVERFLOW_ATTR +void ZSTD_fillHashTableForCDict(ZSTD_matchState_t* ms, + const void* const end, + ZSTD_dictTableLoadMethod_e dtlm) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32* const hashTable = ms->hashTable; + U32 const hBits = cParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS; + U32 const mls = cParams->minMatch; + const BYTE* const base = ms->window.base; + const BYTE* ip = base + ms->nextToUpdate; + const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE; + const U32 fastHashFillStep = 3; + + /* Currently, we always use ZSTD_dtlm_full for filling CDict tables. + * Feel free to remove this assert if there's a good reason! */ + assert(dtlm == ZSTD_dtlm_full); + + /* Always insert every fastHashFillStep position into the hash table. + * Insert the other positions if their hash entry is empty. + */ + for ( ; ip + fastHashFillStep < iend + 2; ip += fastHashFillStep) { + U32 const curr = (U32)(ip - base); + { size_t const hashAndTag = ZSTD_hashPtr(ip, hBits, mls); + ZSTD_writeTaggedIndex(hashTable, hashAndTag, curr); } + + if (dtlm == ZSTD_dtlm_fast) continue; + /* Only load extra positions for ZSTD_dtlm_full */ + { U32 p; + for (p = 1; p < fastHashFillStep; ++p) { + size_t const hashAndTag = ZSTD_hashPtr(ip + p, hBits, mls); + if (hashTable[hashAndTag >> ZSTD_SHORT_CACHE_TAG_BITS] == 0) { /* not yet filled */ + ZSTD_writeTaggedIndex(hashTable, hashAndTag, curr + p); + } } } } +} + +static +ZSTD_ALLOW_POINTER_OVERFLOW_ATTR +void ZSTD_fillHashTableForCCtx(ZSTD_matchState_t* ms, + const void* const end, + ZSTD_dictTableLoadMethod_e dtlm) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32* const hashTable = ms->hashTable; + U32 const hBits = cParams->hashLog; + U32 const mls = cParams->minMatch; + const BYTE* const base = ms->window.base; + const BYTE* ip = base + ms->nextToUpdate; + const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE; + const U32 fastHashFillStep = 3; + + /* Currently, we always use ZSTD_dtlm_fast for filling CCtx tables. + * Feel free to remove this assert if there's a good reason! */ + assert(dtlm == ZSTD_dtlm_fast); + + /* Always insert every fastHashFillStep position into the hash table. + * Insert the other positions if their hash entry is empty. + */ + for ( ; ip + fastHashFillStep < iend + 2; ip += fastHashFillStep) { + U32 const curr = (U32)(ip - base); + size_t const hash0 = ZSTD_hashPtr(ip, hBits, mls); + hashTable[hash0] = curr; + if (dtlm == ZSTD_dtlm_fast) continue; + /* Only load extra positions for ZSTD_dtlm_full */ + { U32 p; + for (p = 1; p < fastHashFillStep; ++p) { + size_t const hash = ZSTD_hashPtr(ip + p, hBits, mls); + if (hashTable[hash] == 0) { /* not yet filled */ + hashTable[hash] = curr + p; + } } } } +} + +void ZSTD_fillHashTable(ZSTD_matchState_t* ms, + const void* const end, + ZSTD_dictTableLoadMethod_e dtlm, + ZSTD_tableFillPurpose_e tfp) +{ + if (tfp == ZSTD_tfp_forCDict) { + ZSTD_fillHashTableForCDict(ms, end, dtlm); + } else { + ZSTD_fillHashTableForCCtx(ms, end, dtlm); + } +} + + +/** + * If you squint hard enough (and ignore repcodes), the search operation at any + * given position is broken into 4 stages: + * + * 1. Hash (map position to hash value via input read) + * 2. Lookup (map hash val to index via hashtable read) + * 3. Load (map index to value at that position via input read) + * 4. Compare + * + * Each of these steps involves a memory read at an address which is computed + * from the previous step. This means these steps must be sequenced and their + * latencies are cumulative. + * + * Rather than do 1->2->3->4 sequentially for a single position before moving + * onto the next, this implementation interleaves these operations across the + * next few positions: + * + * R = Repcode Read & Compare + * H = Hash + * T = Table Lookup + * M = Match Read & Compare + * + * Pos | Time --> + * ----+------------------- + * N | ... M + * N+1 | ... TM + * N+2 | R H T M + * N+3 | H TM + * N+4 | R H T M + * N+5 | H ... + * N+6 | R ... + * + * This is very much analogous to the pipelining of execution in a CPU. And just + * like a CPU, we have to dump the pipeline when we find a match (i.e., take a + * branch). + * + * When this happens, we throw away our current state, and do the following prep + * to re-enter the loop: + * + * Pos | Time --> + * ----+------------------- + * N | H T + * N+1 | H + * + * This is also the work we do at the beginning to enter the loop initially. + */ +FORCE_INLINE_TEMPLATE +ZSTD_ALLOW_POINTER_OVERFLOW_ATTR +size_t ZSTD_compressBlock_fast_noDict_generic( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize, + U32 const mls, U32 const hasStep) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32* const hashTable = ms->hashTable; + U32 const hlog = cParams->hashLog; + /* support stepSize of 0 */ + size_t const stepSize = hasStep ? (cParams->targetLength + !(cParams->targetLength) + 1) : 2; + const BYTE* const base = ms->window.base; + const BYTE* const istart = (const BYTE*)src; + const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); + const U32 prefixStartIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog); + const BYTE* const prefixStart = base + prefixStartIndex; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - HASH_READ_SIZE; + + const BYTE* anchor = istart; + const BYTE* ip0 = istart; + const BYTE* ip1; + const BYTE* ip2; + const BYTE* ip3; + U32 current0; + + U32 rep_offset1 = rep[0]; + U32 rep_offset2 = rep[1]; + U32 offsetSaved1 = 0, offsetSaved2 = 0; + + size_t hash0; /* hash for ip0 */ + size_t hash1; /* hash for ip1 */ + U32 idx; /* match idx for ip0 */ + U32 mval; /* src value at match idx */ + + U32 offcode; + const BYTE* match0; + size_t mLength; + + /* ip0 and ip1 are always adjacent. The targetLength skipping and + * uncompressibility acceleration is applied to every other position, + * matching the behavior of #1562. step therefore represents the gap + * between pairs of positions, from ip0 to ip2 or ip1 to ip3. */ + size_t step; + const BYTE* nextStep; + const size_t kStepIncr = (1 << (kSearchStrength - 1)); + + DEBUGLOG(5, "ZSTD_compressBlock_fast_generic"); + ip0 += (ip0 == prefixStart); + { U32 const curr = (U32)(ip0 - base); + U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog); + U32 const maxRep = curr - windowLow; + if (rep_offset2 > maxRep) offsetSaved2 = rep_offset2, rep_offset2 = 0; + if (rep_offset1 > maxRep) offsetSaved1 = rep_offset1, rep_offset1 = 0; + } + + /* start each op */ +_start: /* Requires: ip0 */ + + step = stepSize; + nextStep = ip0 + kStepIncr; + + /* calculate positions, ip0 - anchor == 0, so we skip step calc */ + ip1 = ip0 + 1; + ip2 = ip0 + step; + ip3 = ip2 + 1; + + if (ip3 >= ilimit) { + goto _cleanup; + } + + hash0 = ZSTD_hashPtr(ip0, hlog, mls); + hash1 = ZSTD_hashPtr(ip1, hlog, mls); + + idx = hashTable[hash0]; + + do { + /* load repcode match for ip[2]*/ + const U32 rval = MEM_read32(ip2 - rep_offset1); + + /* write back hash table entry */ + current0 = (U32)(ip0 - base); + hashTable[hash0] = current0; + + /* check repcode at ip[2] */ + if ((MEM_read32(ip2) == rval) & (rep_offset1 > 0)) { + ip0 = ip2; + match0 = ip0 - rep_offset1; + mLength = ip0[-1] == match0[-1]; + ip0 -= mLength; + match0 -= mLength; + offcode = REPCODE1_TO_OFFBASE; + mLength += 4; + + /* First write next hash table entry; we've already calculated it. + * This write is known to be safe because the ip1 is before the + * repcode (ip2). */ + hashTable[hash1] = (U32)(ip1 - base); + + goto _match; + } + + /* load match for ip[0] */ + if (idx >= prefixStartIndex) { + mval = MEM_read32(base + idx); + } else { + mval = MEM_read32(ip0) ^ 1; /* guaranteed to not match. */ + } + + /* check match at ip[0] */ + if (MEM_read32(ip0) == mval) { + /* found a match! */ + + /* First write next hash table entry; we've already calculated it. + * This write is known to be safe because the ip1 == ip0 + 1, so + * we know we will resume searching after ip1 */ + hashTable[hash1] = (U32)(ip1 - base); + + goto _offset; + } + + /* lookup ip[1] */ + idx = hashTable[hash1]; + + /* hash ip[2] */ + hash0 = hash1; + hash1 = ZSTD_hashPtr(ip2, hlog, mls); + + /* advance to next positions */ + ip0 = ip1; + ip1 = ip2; + ip2 = ip3; + + /* write back hash table entry */ + current0 = (U32)(ip0 - base); + hashTable[hash0] = current0; + + /* load match for ip[0] */ + if (idx >= prefixStartIndex) { + mval = MEM_read32(base + idx); + } else { + mval = MEM_read32(ip0) ^ 1; /* guaranteed to not match. */ + } + + /* check match at ip[0] */ + if (MEM_read32(ip0) == mval) { + /* found a match! */ + + /* first write next hash table entry; we've already calculated it */ + if (step <= 4) { + /* We need to avoid writing an index into the hash table >= the + * position at which we will pick up our searching after we've + * taken this match. + * + * The minimum possible match has length 4, so the earliest ip0 + * can be after we take this match will be the current ip0 + 4. + * ip1 is ip0 + step - 1. If ip1 is >= ip0 + 4, we can't safely + * write this position. + */ + hashTable[hash1] = (U32)(ip1 - base); + } + + goto _offset; + } + + /* lookup ip[1] */ + idx = hashTable[hash1]; + + /* hash ip[2] */ + hash0 = hash1; + hash1 = ZSTD_hashPtr(ip2, hlog, mls); + + /* advance to next positions */ + ip0 = ip1; + ip1 = ip2; + ip2 = ip0 + step; + ip3 = ip1 + step; + + /* calculate step */ + if (ip2 >= nextStep) { + step++; + PREFETCH_L1(ip1 + 64); + PREFETCH_L1(ip1 + 128); + nextStep += kStepIncr; + } + } while (ip3 < ilimit); + +_cleanup: + /* Note that there are probably still a couple positions we could search. + * However, it seems to be a meaningful performance hit to try to search + * them. So let's not. */ + + /* When the repcodes are outside of the prefix, we set them to zero before the loop. + * When the offsets are still zero, we need to restore them after the block to have a correct + * repcode history. If only one offset was invalid, it is easy. The tricky case is when both + * offsets were invalid. We need to figure out which offset to refill with. + * - If both offsets are zero they are in the same order. + * - If both offsets are non-zero, we won't restore the offsets from `offsetSaved[12]`. + * - If only one is zero, we need to decide which offset to restore. + * - If rep_offset1 is non-zero, then rep_offset2 must be offsetSaved1. + * - It is impossible for rep_offset2 to be non-zero. + * + * So if rep_offset1 started invalid (offsetSaved1 != 0) and became valid (rep_offset1 != 0), then + * set rep[0] = rep_offset1 and rep[1] = offsetSaved1. + */ + offsetSaved2 = ((offsetSaved1 != 0) && (rep_offset1 != 0)) ? offsetSaved1 : offsetSaved2; + + /* save reps for next block */ + rep[0] = rep_offset1 ? rep_offset1 : offsetSaved1; + rep[1] = rep_offset2 ? rep_offset2 : offsetSaved2; + + /* Return the last literals size */ + return (size_t)(iend - anchor); + +_offset: /* Requires: ip0, idx */ + + /* Compute the offset code. */ + match0 = base + idx; + rep_offset2 = rep_offset1; + rep_offset1 = (U32)(ip0-match0); + offcode = OFFSET_TO_OFFBASE(rep_offset1); + mLength = 4; + + /* Count the backwards match length. */ + while (((ip0>anchor) & (match0>prefixStart)) && (ip0[-1] == match0[-1])) { + ip0--; + match0--; + mLength++; + } + +_match: /* Requires: ip0, match0, offcode */ + + /* Count the forward length. */ + mLength += ZSTD_count(ip0 + mLength, match0 + mLength, iend); + + ZSTD_storeSeq(seqStore, (size_t)(ip0 - anchor), anchor, iend, offcode, mLength); + + ip0 += mLength; + anchor = ip0; + + /* Fill table and check for immediate repcode. */ + if (ip0 <= ilimit) { + /* Fill Table */ + assert(base+current0+2 > istart); /* check base overflow */ + hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2; /* here because current+2 could be > iend-8 */ + hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base); + + if (rep_offset2 > 0) { /* rep_offset2==0 means rep_offset2 is invalidated */ + while ( (ip0 <= ilimit) && (MEM_read32(ip0) == MEM_read32(ip0 - rep_offset2)) ) { + /* store sequence */ + size_t const rLength = ZSTD_count(ip0+4, ip0+4-rep_offset2, iend) + 4; + { U32 const tmpOff = rep_offset2; rep_offset2 = rep_offset1; rep_offset1 = tmpOff; } /* swap rep_offset2 <=> rep_offset1 */ + hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base); + ip0 += rLength; + ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, REPCODE1_TO_OFFBASE, rLength); + anchor = ip0; + continue; /* faster when present (confirmed on gcc-8) ... (?) */ + } } } + + goto _start; +} + +#define ZSTD_GEN_FAST_FN(dictMode, mls, step) \ + static size_t ZSTD_compressBlock_fast_##dictMode##_##mls##_##step( \ + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], \ + void const* src, size_t srcSize) \ + { \ + return ZSTD_compressBlock_fast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mls, step); \ + } + +ZSTD_GEN_FAST_FN(noDict, 4, 1) +ZSTD_GEN_FAST_FN(noDict, 5, 1) +ZSTD_GEN_FAST_FN(noDict, 6, 1) +ZSTD_GEN_FAST_FN(noDict, 7, 1) + +ZSTD_GEN_FAST_FN(noDict, 4, 0) +ZSTD_GEN_FAST_FN(noDict, 5, 0) +ZSTD_GEN_FAST_FN(noDict, 6, 0) +ZSTD_GEN_FAST_FN(noDict, 7, 0) + +size_t ZSTD_compressBlock_fast( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + U32 const mls = ms->cParams.minMatch; + assert(ms->dictMatchState == NULL); + if (ms->cParams.targetLength > 1) { + switch(mls) + { + default: /* includes case 3 */ + case 4 : + return ZSTD_compressBlock_fast_noDict_4_1(ms, seqStore, rep, src, srcSize); + case 5 : + return ZSTD_compressBlock_fast_noDict_5_1(ms, seqStore, rep, src, srcSize); + case 6 : + return ZSTD_compressBlock_fast_noDict_6_1(ms, seqStore, rep, src, srcSize); + case 7 : + return ZSTD_compressBlock_fast_noDict_7_1(ms, seqStore, rep, src, srcSize); + } + } else { + switch(mls) + { + default: /* includes case 3 */ + case 4 : + return ZSTD_compressBlock_fast_noDict_4_0(ms, seqStore, rep, src, srcSize); + case 5 : + return ZSTD_compressBlock_fast_noDict_5_0(ms, seqStore, rep, src, srcSize); + case 6 : + return ZSTD_compressBlock_fast_noDict_6_0(ms, seqStore, rep, src, srcSize); + case 7 : + return ZSTD_compressBlock_fast_noDict_7_0(ms, seqStore, rep, src, srcSize); + } + + } +} + +FORCE_INLINE_TEMPLATE +ZSTD_ALLOW_POINTER_OVERFLOW_ATTR +size_t ZSTD_compressBlock_fast_dictMatchState_generic( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize, U32 const mls, U32 const hasStep) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32* const hashTable = ms->hashTable; + U32 const hlog = cParams->hashLog; + /* support stepSize of 0 */ + U32 const stepSize = cParams->targetLength + !(cParams->targetLength); + const BYTE* const base = ms->window.base; + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip0 = istart; + const BYTE* ip1 = ip0 + stepSize; /* we assert below that stepSize >= 1 */ + const BYTE* anchor = istart; + const U32 prefixStartIndex = ms->window.dictLimit; + const BYTE* const prefixStart = base + prefixStartIndex; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - HASH_READ_SIZE; + U32 offset_1=rep[0], offset_2=rep[1]; + + const ZSTD_matchState_t* const dms = ms->dictMatchState; + const ZSTD_compressionParameters* const dictCParams = &dms->cParams ; + const U32* const dictHashTable = dms->hashTable; + const U32 dictStartIndex = dms->window.dictLimit; + const BYTE* const dictBase = dms->window.base; + const BYTE* const dictStart = dictBase + dictStartIndex; + const BYTE* const dictEnd = dms->window.nextSrc; + const U32 dictIndexDelta = prefixStartIndex - (U32)(dictEnd - dictBase); + const U32 dictAndPrefixLength = (U32)(istart - prefixStart + dictEnd - dictStart); + const U32 dictHBits = dictCParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS; + + /* if a dictionary is still attached, it necessarily means that + * it is within window size. So we just check it. */ + const U32 maxDistance = 1U << cParams->windowLog; + const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); + assert(endIndex - prefixStartIndex <= maxDistance); + (void)maxDistance; (void)endIndex; /* these variables are not used when assert() is disabled */ + + (void)hasStep; /* not currently specialized on whether it's accelerated */ + + /* ensure there will be no underflow + * when translating a dict index into a local index */ + assert(prefixStartIndex >= (U32)(dictEnd - dictBase)); + + if (ms->prefetchCDictTables) { + size_t const hashTableBytes = (((size_t)1) << dictCParams->hashLog) * sizeof(U32); + PREFETCH_AREA(dictHashTable, hashTableBytes); + } + + /* init */ + DEBUGLOG(5, "ZSTD_compressBlock_fast_dictMatchState_generic"); + ip0 += (dictAndPrefixLength == 0); + /* dictMatchState repCode checks don't currently handle repCode == 0 + * disabling. */ + assert(offset_1 <= dictAndPrefixLength); + assert(offset_2 <= dictAndPrefixLength); + + /* Outer search loop */ + assert(stepSize >= 1); + while (ip1 <= ilimit) { /* repcode check at (ip0 + 1) is safe because ip0 < ip1 */ + size_t mLength; + size_t hash0 = ZSTD_hashPtr(ip0, hlog, mls); + + size_t const dictHashAndTag0 = ZSTD_hashPtr(ip0, dictHBits, mls); + U32 dictMatchIndexAndTag = dictHashTable[dictHashAndTag0 >> ZSTD_SHORT_CACHE_TAG_BITS]; + int dictTagsMatch = ZSTD_comparePackedTags(dictMatchIndexAndTag, dictHashAndTag0); + + U32 matchIndex = hashTable[hash0]; + U32 curr = (U32)(ip0 - base); + size_t step = stepSize; + const size_t kStepIncr = 1 << kSearchStrength; + const BYTE* nextStep = ip0 + kStepIncr; + + /* Inner search loop */ + while (1) { + const BYTE* match = base + matchIndex; + const U32 repIndex = curr + 1 - offset_1; + const BYTE* repMatch = (repIndex < prefixStartIndex) ? + dictBase + (repIndex - dictIndexDelta) : + base + repIndex; + const size_t hash1 = ZSTD_hashPtr(ip1, hlog, mls); + size_t const dictHashAndTag1 = ZSTD_hashPtr(ip1, dictHBits, mls); + hashTable[hash0] = curr; /* update hash table */ + + if (((U32) ((prefixStartIndex - 1) - repIndex) >= + 3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */ + && (MEM_read32(repMatch) == MEM_read32(ip0 + 1))) { + const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; + mLength = ZSTD_count_2segments(ip0 + 1 + 4, repMatch + 4, iend, repMatchEnd, prefixStart) + 4; + ip0++; + ZSTD_storeSeq(seqStore, (size_t) (ip0 - anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength); + break; + } + + if (dictTagsMatch) { + /* Found a possible dict match */ + const U32 dictMatchIndex = dictMatchIndexAndTag >> ZSTD_SHORT_CACHE_TAG_BITS; + const BYTE* dictMatch = dictBase + dictMatchIndex; + if (dictMatchIndex > dictStartIndex && + MEM_read32(dictMatch) == MEM_read32(ip0)) { + /* To replicate extDict parse behavior, we only use dict matches when the normal matchIndex is invalid */ + if (matchIndex <= prefixStartIndex) { + U32 const offset = (U32) (curr - dictMatchIndex - dictIndexDelta); + mLength = ZSTD_count_2segments(ip0 + 4, dictMatch + 4, iend, dictEnd, prefixStart) + 4; + while (((ip0 > anchor) & (dictMatch > dictStart)) + && (ip0[-1] == dictMatch[-1])) { + ip0--; + dictMatch--; + mLength++; + } /* catch up */ + offset_2 = offset_1; + offset_1 = offset; + ZSTD_storeSeq(seqStore, (size_t) (ip0 - anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength); + break; + } + } + } + + if (matchIndex > prefixStartIndex && MEM_read32(match) == MEM_read32(ip0)) { + /* found a regular match */ + U32 const offset = (U32) (ip0 - match); + mLength = ZSTD_count(ip0 + 4, match + 4, iend) + 4; + while (((ip0 > anchor) & (match > prefixStart)) + && (ip0[-1] == match[-1])) { + ip0--; + match--; + mLength++; + } /* catch up */ + offset_2 = offset_1; + offset_1 = offset; + ZSTD_storeSeq(seqStore, (size_t) (ip0 - anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength); + break; + } + + /* Prepare for next iteration */ + dictMatchIndexAndTag = dictHashTable[dictHashAndTag1 >> ZSTD_SHORT_CACHE_TAG_BITS]; + dictTagsMatch = ZSTD_comparePackedTags(dictMatchIndexAndTag, dictHashAndTag1); + matchIndex = hashTable[hash1]; + + if (ip1 >= nextStep) { + step++; + nextStep += kStepIncr; + } + ip0 = ip1; + ip1 = ip1 + step; + if (ip1 > ilimit) goto _cleanup; + + curr = (U32)(ip0 - base); + hash0 = hash1; + } /* end inner search loop */ + + /* match found */ + assert(mLength); + ip0 += mLength; + anchor = ip0; + + if (ip0 <= ilimit) { + /* Fill Table */ + assert(base+curr+2 > istart); /* check base overflow */ + hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2; /* here because curr+2 could be > iend-8 */ + hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base); + + /* check immediate repcode */ + while (ip0 <= ilimit) { + U32 const current2 = (U32)(ip0-base); + U32 const repIndex2 = current2 - offset_2; + const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? + dictBase - dictIndexDelta + repIndex2 : + base + repIndex2; + if ( ((U32)((prefixStartIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */) + && (MEM_read32(repMatch2) == MEM_read32(ip0))) { + const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; + size_t const repLength2 = ZSTD_count_2segments(ip0+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; + U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ + ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, repLength2); + hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = current2; + ip0 += repLength2; + anchor = ip0; + continue; + } + break; + } + } + + /* Prepare for next iteration */ + assert(ip0 == anchor); + ip1 = ip0 + stepSize; + } + +_cleanup: + /* save reps for next block */ + rep[0] = offset_1; + rep[1] = offset_2; + + /* Return the last literals size */ + return (size_t)(iend - anchor); +} + + +ZSTD_GEN_FAST_FN(dictMatchState, 4, 0) +ZSTD_GEN_FAST_FN(dictMatchState, 5, 0) +ZSTD_GEN_FAST_FN(dictMatchState, 6, 0) +ZSTD_GEN_FAST_FN(dictMatchState, 7, 0) + +size_t ZSTD_compressBlock_fast_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + U32 const mls = ms->cParams.minMatch; + assert(ms->dictMatchState != NULL); + switch(mls) + { + default: /* includes case 3 */ + case 4 : + return ZSTD_compressBlock_fast_dictMatchState_4_0(ms, seqStore, rep, src, srcSize); + case 5 : + return ZSTD_compressBlock_fast_dictMatchState_5_0(ms, seqStore, rep, src, srcSize); + case 6 : + return ZSTD_compressBlock_fast_dictMatchState_6_0(ms, seqStore, rep, src, srcSize); + case 7 : + return ZSTD_compressBlock_fast_dictMatchState_7_0(ms, seqStore, rep, src, srcSize); + } +} + + +static +ZSTD_ALLOW_POINTER_OVERFLOW_ATTR +size_t ZSTD_compressBlock_fast_extDict_generic( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize, U32 const mls, U32 const hasStep) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32* const hashTable = ms->hashTable; + U32 const hlog = cParams->hashLog; + /* support stepSize of 0 */ + size_t const stepSize = cParams->targetLength + !(cParams->targetLength) + 1; + const BYTE* const base = ms->window.base; + const BYTE* const dictBase = ms->window.dictBase; + const BYTE* const istart = (const BYTE*)src; + const BYTE* anchor = istart; + const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); + const U32 lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog); + const U32 dictStartIndex = lowLimit; + const BYTE* const dictStart = dictBase + dictStartIndex; + const U32 dictLimit = ms->window.dictLimit; + const U32 prefixStartIndex = dictLimit < lowLimit ? lowLimit : dictLimit; + const BYTE* const prefixStart = base + prefixStartIndex; + const BYTE* const dictEnd = dictBase + prefixStartIndex; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - 8; + U32 offset_1=rep[0], offset_2=rep[1]; + U32 offsetSaved1 = 0, offsetSaved2 = 0; + + const BYTE* ip0 = istart; + const BYTE* ip1; + const BYTE* ip2; + const BYTE* ip3; + U32 current0; + + + size_t hash0; /* hash for ip0 */ + size_t hash1; /* hash for ip1 */ + U32 idx; /* match idx for ip0 */ + const BYTE* idxBase; /* base pointer for idx */ + + U32 offcode; + const BYTE* match0; + size_t mLength; + const BYTE* matchEnd = 0; /* initialize to avoid warning, assert != 0 later */ + + size_t step; + const BYTE* nextStep; + const size_t kStepIncr = (1 << (kSearchStrength - 1)); + + (void)hasStep; /* not currently specialized on whether it's accelerated */ + + DEBUGLOG(5, "ZSTD_compressBlock_fast_extDict_generic (offset_1=%u)", offset_1); + + /* switch to "regular" variant if extDict is invalidated due to maxDistance */ + if (prefixStartIndex == dictStartIndex) + return ZSTD_compressBlock_fast(ms, seqStore, rep, src, srcSize); + + { U32 const curr = (U32)(ip0 - base); + U32 const maxRep = curr - dictStartIndex; + if (offset_2 >= maxRep) offsetSaved2 = offset_2, offset_2 = 0; + if (offset_1 >= maxRep) offsetSaved1 = offset_1, offset_1 = 0; + } + + /* start each op */ +_start: /* Requires: ip0 */ + + step = stepSize; + nextStep = ip0 + kStepIncr; + + /* calculate positions, ip0 - anchor == 0, so we skip step calc */ + ip1 = ip0 + 1; + ip2 = ip0 + step; + ip3 = ip2 + 1; + + if (ip3 >= ilimit) { + goto _cleanup; + } + + hash0 = ZSTD_hashPtr(ip0, hlog, mls); + hash1 = ZSTD_hashPtr(ip1, hlog, mls); + + idx = hashTable[hash0]; + idxBase = idx < prefixStartIndex ? dictBase : base; + + do { + { /* load repcode match for ip[2] */ + U32 const current2 = (U32)(ip2 - base); + U32 const repIndex = current2 - offset_1; + const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base; + U32 rval; + if ( ((U32)(prefixStartIndex - repIndex) >= 4) /* intentional underflow */ + & (offset_1 > 0) ) { + rval = MEM_read32(repBase + repIndex); + } else { + rval = MEM_read32(ip2) ^ 1; /* guaranteed to not match. */ + } + + /* write back hash table entry */ + current0 = (U32)(ip0 - base); + hashTable[hash0] = current0; + + /* check repcode at ip[2] */ + if (MEM_read32(ip2) == rval) { + ip0 = ip2; + match0 = repBase + repIndex; + matchEnd = repIndex < prefixStartIndex ? dictEnd : iend; + assert((match0 != prefixStart) & (match0 != dictStart)); + mLength = ip0[-1] == match0[-1]; + ip0 -= mLength; + match0 -= mLength; + offcode = REPCODE1_TO_OFFBASE; + mLength += 4; + goto _match; + } } + + { /* load match for ip[0] */ + U32 const mval = idx >= dictStartIndex ? + MEM_read32(idxBase + idx) : + MEM_read32(ip0) ^ 1; /* guaranteed not to match */ + + /* check match at ip[0] */ + if (MEM_read32(ip0) == mval) { + /* found a match! */ + goto _offset; + } } + + /* lookup ip[1] */ + idx = hashTable[hash1]; + idxBase = idx < prefixStartIndex ? dictBase : base; + + /* hash ip[2] */ + hash0 = hash1; + hash1 = ZSTD_hashPtr(ip2, hlog, mls); + + /* advance to next positions */ + ip0 = ip1; + ip1 = ip2; + ip2 = ip3; + + /* write back hash table entry */ + current0 = (U32)(ip0 - base); + hashTable[hash0] = current0; + + { /* load match for ip[0] */ + U32 const mval = idx >= dictStartIndex ? + MEM_read32(idxBase + idx) : + MEM_read32(ip0) ^ 1; /* guaranteed not to match */ + + /* check match at ip[0] */ + if (MEM_read32(ip0) == mval) { + /* found a match! */ + goto _offset; + } } + + /* lookup ip[1] */ + idx = hashTable[hash1]; + idxBase = idx < prefixStartIndex ? dictBase : base; + + /* hash ip[2] */ + hash0 = hash1; + hash1 = ZSTD_hashPtr(ip2, hlog, mls); + + /* advance to next positions */ + ip0 = ip1; + ip1 = ip2; + ip2 = ip0 + step; + ip3 = ip1 + step; + + /* calculate step */ + if (ip2 >= nextStep) { + step++; + PREFETCH_L1(ip1 + 64); + PREFETCH_L1(ip1 + 128); + nextStep += kStepIncr; + } + } while (ip3 < ilimit); + +_cleanup: + /* Note that there are probably still a couple positions we could search. + * However, it seems to be a meaningful performance hit to try to search + * them. So let's not. */ + + /* If offset_1 started invalid (offsetSaved1 != 0) and became valid (offset_1 != 0), + * rotate saved offsets. See comment in ZSTD_compressBlock_fast_noDict for more context. */ + offsetSaved2 = ((offsetSaved1 != 0) && (offset_1 != 0)) ? offsetSaved1 : offsetSaved2; + + /* save reps for next block */ + rep[0] = offset_1 ? offset_1 : offsetSaved1; + rep[1] = offset_2 ? offset_2 : offsetSaved2; + + /* Return the last literals size */ + return (size_t)(iend - anchor); + +_offset: /* Requires: ip0, idx, idxBase */ + + /* Compute the offset code. */ + { U32 const offset = current0 - idx; + const BYTE* const lowMatchPtr = idx < prefixStartIndex ? dictStart : prefixStart; + matchEnd = idx < prefixStartIndex ? dictEnd : iend; + match0 = idxBase + idx; + offset_2 = offset_1; + offset_1 = offset; + offcode = OFFSET_TO_OFFBASE(offset); + mLength = 4; + + /* Count the backwards match length. */ + while (((ip0>anchor) & (match0>lowMatchPtr)) && (ip0[-1] == match0[-1])) { + ip0--; + match0--; + mLength++; + } } + +_match: /* Requires: ip0, match0, offcode, matchEnd */ + + /* Count the forward length. */ + assert(matchEnd != 0); + mLength += ZSTD_count_2segments(ip0 + mLength, match0 + mLength, iend, matchEnd, prefixStart); + + ZSTD_storeSeq(seqStore, (size_t)(ip0 - anchor), anchor, iend, offcode, mLength); + + ip0 += mLength; + anchor = ip0; + + /* write next hash table entry */ + if (ip1 < ip0) { + hashTable[hash1] = (U32)(ip1 - base); + } + + /* Fill table and check for immediate repcode. */ + if (ip0 <= ilimit) { + /* Fill Table */ + assert(base+current0+2 > istart); /* check base overflow */ + hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2; /* here because current+2 could be > iend-8 */ + hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base); + + while (ip0 <= ilimit) { + U32 const repIndex2 = (U32)(ip0-base) - offset_2; + const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2; + if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (offset_2 > 0)) /* intentional underflow */ + && (MEM_read32(repMatch2) == MEM_read32(ip0)) ) { + const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; + size_t const repLength2 = ZSTD_count_2segments(ip0+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; + { U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; } /* swap offset_2 <=> offset_1 */ + ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend, REPCODE1_TO_OFFBASE, repLength2); + hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base); + ip0 += repLength2; + anchor = ip0; + continue; + } + break; + } } + + goto _start; +} + +ZSTD_GEN_FAST_FN(extDict, 4, 0) +ZSTD_GEN_FAST_FN(extDict, 5, 0) +ZSTD_GEN_FAST_FN(extDict, 6, 0) +ZSTD_GEN_FAST_FN(extDict, 7, 0) + +size_t ZSTD_compressBlock_fast_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + U32 const mls = ms->cParams.minMatch; + assert(ms->dictMatchState == NULL); + switch(mls) + { + default: /* includes case 3 */ + case 4 : + return ZSTD_compressBlock_fast_extDict_4_0(ms, seqStore, rep, src, srcSize); + case 5 : + return ZSTD_compressBlock_fast_extDict_5_0(ms, seqStore, rep, src, srcSize); + case 6 : + return ZSTD_compressBlock_fast_extDict_6_0(ms, seqStore, rep, src, srcSize); + case 7 : + return ZSTD_compressBlock_fast_extDict_7_0(ms, seqStore, rep, src, srcSize); + } +} diff --git a/c-blosc/internal-complibs/zstd-1.5.6/compress/zstd_fast.h b/c-blosc/internal-complibs/zstd-1.5.6/compress/zstd_fast.h new file mode 100644 index 0000000..9e4236b --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.5.6/compress/zstd_fast.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_FAST_H +#define ZSTD_FAST_H + +#if defined (__cplusplus) +extern "C" { +#endif + +#include "../common/mem.h" /* U32 */ +#include "zstd_compress_internal.h" + +void ZSTD_fillHashTable(ZSTD_matchState_t* ms, + void const* end, ZSTD_dictTableLoadMethod_e dtlm, + ZSTD_tableFillPurpose_e tfp); +size_t ZSTD_compressBlock_fast( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_fast_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_fast_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_FAST_H */ diff --git a/c-blosc/internal-complibs/zstd-1.5.6/compress/zstd_lazy.c b/c-blosc/internal-complibs/zstd-1.5.6/compress/zstd_lazy.c new file mode 100644 index 0000000..67dd55f --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.5.6/compress/zstd_lazy.c @@ -0,0 +1,2199 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#include "zstd_compress_internal.h" +#include "zstd_lazy.h" +#include "../common/bits.h" /* ZSTD_countTrailingZeros64 */ + +#if !defined(ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR) \ + || !defined(ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR) \ + || !defined(ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR) \ + || !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR) + +#define kLazySkippingStep 8 + + +/*-************************************* +* Binary Tree search +***************************************/ + +static +ZSTD_ALLOW_POINTER_OVERFLOW_ATTR +void ZSTD_updateDUBT(ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* iend, + U32 mls) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32* const hashTable = ms->hashTable; + U32 const hashLog = cParams->hashLog; + + U32* const bt = ms->chainTable; + U32 const btLog = cParams->chainLog - 1; + U32 const btMask = (1 << btLog) - 1; + + const BYTE* const base = ms->window.base; + U32 const target = (U32)(ip - base); + U32 idx = ms->nextToUpdate; + + if (idx != target) + DEBUGLOG(7, "ZSTD_updateDUBT, from %u to %u (dictLimit:%u)", + idx, target, ms->window.dictLimit); + assert(ip + 8 <= iend); /* condition for ZSTD_hashPtr */ + (void)iend; + + assert(idx >= ms->window.dictLimit); /* condition for valid base+idx */ + for ( ; idx < target ; idx++) { + size_t const h = ZSTD_hashPtr(base + idx, hashLog, mls); /* assumption : ip + 8 <= iend */ + U32 const matchIndex = hashTable[h]; + + U32* const nextCandidatePtr = bt + 2*(idx&btMask); + U32* const sortMarkPtr = nextCandidatePtr + 1; + + DEBUGLOG(8, "ZSTD_updateDUBT: insert %u", idx); + hashTable[h] = idx; /* Update Hash Table */ + *nextCandidatePtr = matchIndex; /* update BT like a chain */ + *sortMarkPtr = ZSTD_DUBT_UNSORTED_MARK; + } + ms->nextToUpdate = target; +} + + +/** ZSTD_insertDUBT1() : + * sort one already inserted but unsorted position + * assumption : curr >= btlow == (curr - btmask) + * doesn't fail */ +static +ZSTD_ALLOW_POINTER_OVERFLOW_ATTR +void ZSTD_insertDUBT1(const ZSTD_matchState_t* ms, + U32 curr, const BYTE* inputEnd, + U32 nbCompares, U32 btLow, + const ZSTD_dictMode_e dictMode) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32* const bt = ms->chainTable; + U32 const btLog = cParams->chainLog - 1; + U32 const btMask = (1 << btLog) - 1; + size_t commonLengthSmaller=0, commonLengthLarger=0; + const BYTE* const base = ms->window.base; + const BYTE* const dictBase = ms->window.dictBase; + const U32 dictLimit = ms->window.dictLimit; + const BYTE* const ip = (curr>=dictLimit) ? base + curr : dictBase + curr; + const BYTE* const iend = (curr>=dictLimit) ? inputEnd : dictBase + dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const prefixStart = base + dictLimit; + const BYTE* match; + U32* smallerPtr = bt + 2*(curr&btMask); + U32* largerPtr = smallerPtr + 1; + U32 matchIndex = *smallerPtr; /* this candidate is unsorted : next sorted candidate is reached through *smallerPtr, while *largerPtr contains previous unsorted candidate (which is already saved and can be overwritten) */ + U32 dummy32; /* to be nullified at the end */ + U32 const windowValid = ms->window.lowLimit; + U32 const maxDistance = 1U << cParams->windowLog; + U32 const windowLow = (curr - windowValid > maxDistance) ? curr - maxDistance : windowValid; + + + DEBUGLOG(8, "ZSTD_insertDUBT1(%u) (dictLimit=%u, lowLimit=%u)", + curr, dictLimit, windowLow); + assert(curr >= btLow); + assert(ip < iend); /* condition for ZSTD_count */ + + for (; nbCompares && (matchIndex > windowLow); --nbCompares) { + U32* const nextPtr = bt + 2*(matchIndex & btMask); + size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ + assert(matchIndex < curr); + /* note : all candidates are now supposed sorted, + * but it's still possible to have nextPtr[1] == ZSTD_DUBT_UNSORTED_MARK + * when a real index has the same value as ZSTD_DUBT_UNSORTED_MARK */ + + if ( (dictMode != ZSTD_extDict) + || (matchIndex+matchLength >= dictLimit) /* both in current segment*/ + || (curr < dictLimit) /* both in extDict */) { + const BYTE* const mBase = ( (dictMode != ZSTD_extDict) + || (matchIndex+matchLength >= dictLimit)) ? + base : dictBase; + assert( (matchIndex+matchLength >= dictLimit) /* might be wrong if extDict is incorrectly set to 0 */ + || (curr < dictLimit) ); + match = mBase + matchIndex; + matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend); + } else { + match = dictBase + matchIndex; + matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); + if (matchIndex+matchLength >= dictLimit) + match = base + matchIndex; /* preparation for next read of match[matchLength] */ + } + + DEBUGLOG(8, "ZSTD_insertDUBT1: comparing %u with %u : found %u common bytes ", + curr, matchIndex, (U32)matchLength); + + if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */ + break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */ + } + + if (match[matchLength] < ip[matchLength]) { /* necessarily within buffer */ + /* match is smaller than current */ + *smallerPtr = matchIndex; /* update smaller idx */ + commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ + if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop searching */ + DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is smaller : next => %u", + matchIndex, btLow, nextPtr[1]); + smallerPtr = nextPtr+1; /* new "candidate" => larger than match, which was smaller than target */ + matchIndex = nextPtr[1]; /* new matchIndex, larger than previous and closer to current */ + } else { + /* match is larger than current */ + *largerPtr = matchIndex; + commonLengthLarger = matchLength; + if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop searching */ + DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is larger => %u", + matchIndex, btLow, nextPtr[0]); + largerPtr = nextPtr; + matchIndex = nextPtr[0]; + } } + + *smallerPtr = *largerPtr = 0; +} + + +static +ZSTD_ALLOW_POINTER_OVERFLOW_ATTR +size_t ZSTD_DUBT_findBetterDictMatch ( + const ZSTD_matchState_t* ms, + const BYTE* const ip, const BYTE* const iend, + size_t* offsetPtr, + size_t bestLength, + U32 nbCompares, + U32 const mls, + const ZSTD_dictMode_e dictMode) +{ + const ZSTD_matchState_t * const dms = ms->dictMatchState; + const ZSTD_compressionParameters* const dmsCParams = &dms->cParams; + const U32 * const dictHashTable = dms->hashTable; + U32 const hashLog = dmsCParams->hashLog; + size_t const h = ZSTD_hashPtr(ip, hashLog, mls); + U32 dictMatchIndex = dictHashTable[h]; + + const BYTE* const base = ms->window.base; + const BYTE* const prefixStart = base + ms->window.dictLimit; + U32 const curr = (U32)(ip-base); + const BYTE* const dictBase = dms->window.base; + const BYTE* const dictEnd = dms->window.nextSrc; + U32 const dictHighLimit = (U32)(dms->window.nextSrc - dms->window.base); + U32 const dictLowLimit = dms->window.lowLimit; + U32 const dictIndexDelta = ms->window.lowLimit - dictHighLimit; + + U32* const dictBt = dms->chainTable; + U32 const btLog = dmsCParams->chainLog - 1; + U32 const btMask = (1 << btLog) - 1; + U32 const btLow = (btMask >= dictHighLimit - dictLowLimit) ? dictLowLimit : dictHighLimit - btMask; + + size_t commonLengthSmaller=0, commonLengthLarger=0; + + (void)dictMode; + assert(dictMode == ZSTD_dictMatchState); + + for (; nbCompares && (dictMatchIndex > dictLowLimit); --nbCompares) { + U32* const nextPtr = dictBt + 2*(dictMatchIndex & btMask); + size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ + const BYTE* match = dictBase + dictMatchIndex; + matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); + if (dictMatchIndex+matchLength >= dictHighLimit) + match = base + dictMatchIndex + dictIndexDelta; /* to prepare for next usage of match[matchLength] */ + + if (matchLength > bestLength) { + U32 matchIndex = dictMatchIndex + dictIndexDelta; + if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) { + DEBUGLOG(9, "ZSTD_DUBT_findBetterDictMatch(%u) : found better match length %u -> %u and offsetCode %u -> %u (dictMatchIndex %u, matchIndex %u)", + curr, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, OFFSET_TO_OFFBASE(curr - matchIndex), dictMatchIndex, matchIndex); + bestLength = matchLength, *offsetPtr = OFFSET_TO_OFFBASE(curr - matchIndex); + } + if (ip+matchLength == iend) { /* reached end of input : ip[matchLength] is not valid, no way to know if it's larger or smaller than match */ + break; /* drop, to guarantee consistency (miss a little bit of compression) */ + } + } + + if (match[matchLength] < ip[matchLength]) { + if (dictMatchIndex <= btLow) { break; } /* beyond tree size, stop the search */ + commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ + dictMatchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ + } else { + /* match is larger than current */ + if (dictMatchIndex <= btLow) { break; } /* beyond tree size, stop the search */ + commonLengthLarger = matchLength; + dictMatchIndex = nextPtr[0]; + } + } + + if (bestLength >= MINMATCH) { + U32 const mIndex = curr - (U32)OFFBASE_TO_OFFSET(*offsetPtr); (void)mIndex; + DEBUGLOG(8, "ZSTD_DUBT_findBetterDictMatch(%u) : found match of length %u and offsetCode %u (pos %u)", + curr, (U32)bestLength, (U32)*offsetPtr, mIndex); + } + return bestLength; + +} + + +static +ZSTD_ALLOW_POINTER_OVERFLOW_ATTR +size_t ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms, + const BYTE* const ip, const BYTE* const iend, + size_t* offBasePtr, + U32 const mls, + const ZSTD_dictMode_e dictMode) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32* const hashTable = ms->hashTable; + U32 const hashLog = cParams->hashLog; + size_t const h = ZSTD_hashPtr(ip, hashLog, mls); + U32 matchIndex = hashTable[h]; + + const BYTE* const base = ms->window.base; + U32 const curr = (U32)(ip-base); + U32 const windowLow = ZSTD_getLowestMatchIndex(ms, curr, cParams->windowLog); + + U32* const bt = ms->chainTable; + U32 const btLog = cParams->chainLog - 1; + U32 const btMask = (1 << btLog) - 1; + U32 const btLow = (btMask >= curr) ? 0 : curr - btMask; + U32 const unsortLimit = MAX(btLow, windowLow); + + U32* nextCandidate = bt + 2*(matchIndex&btMask); + U32* unsortedMark = bt + 2*(matchIndex&btMask) + 1; + U32 nbCompares = 1U << cParams->searchLog; + U32 nbCandidates = nbCompares; + U32 previousCandidate = 0; + + DEBUGLOG(7, "ZSTD_DUBT_findBestMatch (%u) ", curr); + assert(ip <= iend-8); /* required for h calculation */ + assert(dictMode != ZSTD_dedicatedDictSearch); + + /* reach end of unsorted candidates list */ + while ( (matchIndex > unsortLimit) + && (*unsortedMark == ZSTD_DUBT_UNSORTED_MARK) + && (nbCandidates > 1) ) { + DEBUGLOG(8, "ZSTD_DUBT_findBestMatch: candidate %u is unsorted", + matchIndex); + *unsortedMark = previousCandidate; /* the unsortedMark becomes a reversed chain, to move up back to original position */ + previousCandidate = matchIndex; + matchIndex = *nextCandidate; + nextCandidate = bt + 2*(matchIndex&btMask); + unsortedMark = bt + 2*(matchIndex&btMask) + 1; + nbCandidates --; + } + + /* nullify last candidate if it's still unsorted + * simplification, detrimental to compression ratio, beneficial for speed */ + if ( (matchIndex > unsortLimit) + && (*unsortedMark==ZSTD_DUBT_UNSORTED_MARK) ) { + DEBUGLOG(7, "ZSTD_DUBT_findBestMatch: nullify last unsorted candidate %u", + matchIndex); + *nextCandidate = *unsortedMark = 0; + } + + /* batch sort stacked candidates */ + matchIndex = previousCandidate; + while (matchIndex) { /* will end on matchIndex == 0 */ + U32* const nextCandidateIdxPtr = bt + 2*(matchIndex&btMask) + 1; + U32 const nextCandidateIdx = *nextCandidateIdxPtr; + ZSTD_insertDUBT1(ms, matchIndex, iend, + nbCandidates, unsortLimit, dictMode); + matchIndex = nextCandidateIdx; + nbCandidates++; + } + + /* find longest match */ + { size_t commonLengthSmaller = 0, commonLengthLarger = 0; + const BYTE* const dictBase = ms->window.dictBase; + const U32 dictLimit = ms->window.dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const prefixStart = base + dictLimit; + U32* smallerPtr = bt + 2*(curr&btMask); + U32* largerPtr = bt + 2*(curr&btMask) + 1; + U32 matchEndIdx = curr + 8 + 1; + U32 dummy32; /* to be nullified at the end */ + size_t bestLength = 0; + + matchIndex = hashTable[h]; + hashTable[h] = curr; /* Update Hash Table */ + + for (; nbCompares && (matchIndex > windowLow); --nbCompares) { + U32* const nextPtr = bt + 2*(matchIndex & btMask); + size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ + const BYTE* match; + + if ((dictMode != ZSTD_extDict) || (matchIndex+matchLength >= dictLimit)) { + match = base + matchIndex; + matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend); + } else { + match = dictBase + matchIndex; + matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); + if (matchIndex+matchLength >= dictLimit) + match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ + } + + if (matchLength > bestLength) { + if (matchLength > matchEndIdx - matchIndex) + matchEndIdx = matchIndex + (U32)matchLength; + if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr - matchIndex + 1) - ZSTD_highbit32((U32)*offBasePtr)) ) + bestLength = matchLength, *offBasePtr = OFFSET_TO_OFFBASE(curr - matchIndex); + if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */ + if (dictMode == ZSTD_dictMatchState) { + nbCompares = 0; /* in addition to avoiding checking any + * further in this loop, make sure we + * skip checking in the dictionary. */ + } + break; /* drop, to guarantee consistency (miss a little bit of compression) */ + } + } + + if (match[matchLength] < ip[matchLength]) { + /* match is smaller than current */ + *smallerPtr = matchIndex; /* update smaller idx */ + commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ + if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + smallerPtr = nextPtr+1; /* new "smaller" => larger of match */ + matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ + } else { + /* match is larger than current */ + *largerPtr = matchIndex; + commonLengthLarger = matchLength; + if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + largerPtr = nextPtr; + matchIndex = nextPtr[0]; + } } + + *smallerPtr = *largerPtr = 0; + + assert(nbCompares <= (1U << ZSTD_SEARCHLOG_MAX)); /* Check we haven't underflowed. */ + if (dictMode == ZSTD_dictMatchState && nbCompares) { + bestLength = ZSTD_DUBT_findBetterDictMatch( + ms, ip, iend, + offBasePtr, bestLength, nbCompares, + mls, dictMode); + } + + assert(matchEndIdx > curr+8); /* ensure nextToUpdate is increased */ + ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */ + if (bestLength >= MINMATCH) { + U32 const mIndex = curr - (U32)OFFBASE_TO_OFFSET(*offBasePtr); (void)mIndex; + DEBUGLOG(8, "ZSTD_DUBT_findBestMatch(%u) : found match of length %u and offsetCode %u (pos %u)", + curr, (U32)bestLength, (U32)*offBasePtr, mIndex); + } + return bestLength; + } +} + + +/** ZSTD_BtFindBestMatch() : Tree updater, providing best match */ +FORCE_INLINE_TEMPLATE +ZSTD_ALLOW_POINTER_OVERFLOW_ATTR +size_t ZSTD_BtFindBestMatch( ZSTD_matchState_t* ms, + const BYTE* const ip, const BYTE* const iLimit, + size_t* offBasePtr, + const U32 mls /* template */, + const ZSTD_dictMode_e dictMode) +{ + DEBUGLOG(7, "ZSTD_BtFindBestMatch"); + if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */ + ZSTD_updateDUBT(ms, ip, iLimit, mls); + return ZSTD_DUBT_findBestMatch(ms, ip, iLimit, offBasePtr, mls, dictMode); +} + +/*********************************** +* Dedicated dict search +***********************************/ + +void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip) +{ + const BYTE* const base = ms->window.base; + U32 const target = (U32)(ip - base); + U32* const hashTable = ms->hashTable; + U32* const chainTable = ms->chainTable; + U32 const chainSize = 1 << ms->cParams.chainLog; + U32 idx = ms->nextToUpdate; + U32 const minChain = chainSize < target - idx ? target - chainSize : idx; + U32 const bucketSize = 1 << ZSTD_LAZY_DDSS_BUCKET_LOG; + U32 const cacheSize = bucketSize - 1; + U32 const chainAttempts = (1 << ms->cParams.searchLog) - cacheSize; + U32 const chainLimit = chainAttempts > 255 ? 255 : chainAttempts; + + /* We know the hashtable is oversized by a factor of `bucketSize`. + * We are going to temporarily pretend `bucketSize == 1`, keeping only a + * single entry. We will use the rest of the space to construct a temporary + * chaintable. + */ + U32 const hashLog = ms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG; + U32* const tmpHashTable = hashTable; + U32* const tmpChainTable = hashTable + ((size_t)1 << hashLog); + U32 const tmpChainSize = (U32)((1 << ZSTD_LAZY_DDSS_BUCKET_LOG) - 1) << hashLog; + U32 const tmpMinChain = tmpChainSize < target ? target - tmpChainSize : idx; + U32 hashIdx; + + assert(ms->cParams.chainLog <= 24); + assert(ms->cParams.hashLog > ms->cParams.chainLog); + assert(idx != 0); + assert(tmpMinChain <= minChain); + + /* fill conventional hash table and conventional chain table */ + for ( ; idx < target; idx++) { + U32 const h = (U32)ZSTD_hashPtr(base + idx, hashLog, ms->cParams.minMatch); + if (idx >= tmpMinChain) { + tmpChainTable[idx - tmpMinChain] = hashTable[h]; + } + tmpHashTable[h] = idx; + } + + /* sort chains into ddss chain table */ + { + U32 chainPos = 0; + for (hashIdx = 0; hashIdx < (1U << hashLog); hashIdx++) { + U32 count; + U32 countBeyondMinChain = 0; + U32 i = tmpHashTable[hashIdx]; + for (count = 0; i >= tmpMinChain && count < cacheSize; count++) { + /* skip through the chain to the first position that won't be + * in the hash cache bucket */ + if (i < minChain) { + countBeyondMinChain++; + } + i = tmpChainTable[i - tmpMinChain]; + } + if (count == cacheSize) { + for (count = 0; count < chainLimit;) { + if (i < minChain) { + if (!i || ++countBeyondMinChain > cacheSize) { + /* only allow pulling `cacheSize` number of entries + * into the cache or chainTable beyond `minChain`, + * to replace the entries pulled out of the + * chainTable into the cache. This lets us reach + * back further without increasing the total number + * of entries in the chainTable, guaranteeing the + * DDSS chain table will fit into the space + * allocated for the regular one. */ + break; + } + } + chainTable[chainPos++] = i; + count++; + if (i < tmpMinChain) { + break; + } + i = tmpChainTable[i - tmpMinChain]; + } + } else { + count = 0; + } + if (count) { + tmpHashTable[hashIdx] = ((chainPos - count) << 8) + count; + } else { + tmpHashTable[hashIdx] = 0; + } + } + assert(chainPos <= chainSize); /* I believe this is guaranteed... */ + } + + /* move chain pointers into the last entry of each hash bucket */ + for (hashIdx = (1 << hashLog); hashIdx; ) { + U32 const bucketIdx = --hashIdx << ZSTD_LAZY_DDSS_BUCKET_LOG; + U32 const chainPackedPointer = tmpHashTable[hashIdx]; + U32 i; + for (i = 0; i < cacheSize; i++) { + hashTable[bucketIdx + i] = 0; + } + hashTable[bucketIdx + bucketSize - 1] = chainPackedPointer; + } + + /* fill the buckets of the hash table */ + for (idx = ms->nextToUpdate; idx < target; idx++) { + U32 const h = (U32)ZSTD_hashPtr(base + idx, hashLog, ms->cParams.minMatch) + << ZSTD_LAZY_DDSS_BUCKET_LOG; + U32 i; + /* Shift hash cache down 1. */ + for (i = cacheSize - 1; i; i--) + hashTable[h + i] = hashTable[h + i - 1]; + hashTable[h] = idx; + } + + ms->nextToUpdate = target; +} + +/* Returns the longest match length found in the dedicated dict search structure. + * If none are longer than the argument ml, then ml will be returned. + */ +FORCE_INLINE_TEMPLATE +size_t ZSTD_dedicatedDictSearch_lazy_search(size_t* offsetPtr, size_t ml, U32 nbAttempts, + const ZSTD_matchState_t* const dms, + const BYTE* const ip, const BYTE* const iLimit, + const BYTE* const prefixStart, const U32 curr, + const U32 dictLimit, const size_t ddsIdx) { + const U32 ddsLowestIndex = dms->window.dictLimit; + const BYTE* const ddsBase = dms->window.base; + const BYTE* const ddsEnd = dms->window.nextSrc; + const U32 ddsSize = (U32)(ddsEnd - ddsBase); + const U32 ddsIndexDelta = dictLimit - ddsSize; + const U32 bucketSize = (1 << ZSTD_LAZY_DDSS_BUCKET_LOG); + const U32 bucketLimit = nbAttempts < bucketSize - 1 ? nbAttempts : bucketSize - 1; + U32 ddsAttempt; + U32 matchIndex; + + for (ddsAttempt = 0; ddsAttempt < bucketSize - 1; ddsAttempt++) { + PREFETCH_L1(ddsBase + dms->hashTable[ddsIdx + ddsAttempt]); + } + + { + U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1]; + U32 const chainIndex = chainPackedPointer >> 8; + + PREFETCH_L1(&dms->chainTable[chainIndex]); + } + + for (ddsAttempt = 0; ddsAttempt < bucketLimit; ddsAttempt++) { + size_t currentMl=0; + const BYTE* match; + matchIndex = dms->hashTable[ddsIdx + ddsAttempt]; + match = ddsBase + matchIndex; + + if (!matchIndex) { + return ml; + } + + /* guaranteed by table construction */ + (void)ddsLowestIndex; + assert(matchIndex >= ddsLowestIndex); + assert(match+4 <= ddsEnd); + if (MEM_read32(match) == MEM_read32(ip)) { + /* assumption : matchIndex <= dictLimit-4 (by table construction) */ + currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4; + } + + /* save best solution */ + if (currentMl > ml) { + ml = currentMl; + *offsetPtr = OFFSET_TO_OFFBASE(curr - (matchIndex + ddsIndexDelta)); + if (ip+currentMl == iLimit) { + /* best possible, avoids read overflow on next attempt */ + return ml; + } + } + } + + { + U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1]; + U32 chainIndex = chainPackedPointer >> 8; + U32 const chainLength = chainPackedPointer & 0xFF; + U32 const chainAttempts = nbAttempts - ddsAttempt; + U32 const chainLimit = chainAttempts > chainLength ? chainLength : chainAttempts; + U32 chainAttempt; + + for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++) { + PREFETCH_L1(ddsBase + dms->chainTable[chainIndex + chainAttempt]); + } + + for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++, chainIndex++) { + size_t currentMl=0; + const BYTE* match; + matchIndex = dms->chainTable[chainIndex]; + match = ddsBase + matchIndex; + + /* guaranteed by table construction */ + assert(matchIndex >= ddsLowestIndex); + assert(match+4 <= ddsEnd); + if (MEM_read32(match) == MEM_read32(ip)) { + /* assumption : matchIndex <= dictLimit-4 (by table construction) */ + currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4; + } + + /* save best solution */ + if (currentMl > ml) { + ml = currentMl; + *offsetPtr = OFFSET_TO_OFFBASE(curr - (matchIndex + ddsIndexDelta)); + if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ + } + } + } + return ml; +} + + +/* ********************************* +* Hash Chain +***********************************/ +#define NEXT_IN_CHAIN(d, mask) chainTable[(d) & (mask)] + +/* Update chains up to ip (excluded) + Assumption : always within prefix (i.e. not within extDict) */ +FORCE_INLINE_TEMPLATE +ZSTD_ALLOW_POINTER_OVERFLOW_ATTR +U32 ZSTD_insertAndFindFirstIndex_internal( + ZSTD_matchState_t* ms, + const ZSTD_compressionParameters* const cParams, + const BYTE* ip, U32 const mls, U32 const lazySkipping) +{ + U32* const hashTable = ms->hashTable; + const U32 hashLog = cParams->hashLog; + U32* const chainTable = ms->chainTable; + const U32 chainMask = (1 << cParams->chainLog) - 1; + const BYTE* const base = ms->window.base; + const U32 target = (U32)(ip - base); + U32 idx = ms->nextToUpdate; + + while(idx < target) { /* catch up */ + size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls); + NEXT_IN_CHAIN(idx, chainMask) = hashTable[h]; + hashTable[h] = idx; + idx++; + /* Stop inserting every position when in the lazy skipping mode. */ + if (lazySkipping) + break; + } + + ms->nextToUpdate = target; + return hashTable[ZSTD_hashPtr(ip, hashLog, mls)]; +} + +U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) { + const ZSTD_compressionParameters* const cParams = &ms->cParams; + return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch, /* lazySkipping*/ 0); +} + +/* inlining is important to hardwire a hot branch (template emulation) */ +FORCE_INLINE_TEMPLATE +ZSTD_ALLOW_POINTER_OVERFLOW_ATTR +size_t ZSTD_HcFindBestMatch( + ZSTD_matchState_t* ms, + const BYTE* const ip, const BYTE* const iLimit, + size_t* offsetPtr, + const U32 mls, const ZSTD_dictMode_e dictMode) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32* const chainTable = ms->chainTable; + const U32 chainSize = (1 << cParams->chainLog); + const U32 chainMask = chainSize-1; + const BYTE* const base = ms->window.base; + const BYTE* const dictBase = ms->window.dictBase; + const U32 dictLimit = ms->window.dictLimit; + const BYTE* const prefixStart = base + dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const U32 curr = (U32)(ip-base); + const U32 maxDistance = 1U << cParams->windowLog; + const U32 lowestValid = ms->window.lowLimit; + const U32 withinMaxDistance = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid; + const U32 isDictionary = (ms->loadedDictEnd != 0); + const U32 lowLimit = isDictionary ? lowestValid : withinMaxDistance; + const U32 minChain = curr > chainSize ? curr - chainSize : 0; + U32 nbAttempts = 1U << cParams->searchLog; + size_t ml=4-1; + + const ZSTD_matchState_t* const dms = ms->dictMatchState; + const U32 ddsHashLog = dictMode == ZSTD_dedicatedDictSearch + ? dms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG : 0; + const size_t ddsIdx = dictMode == ZSTD_dedicatedDictSearch + ? ZSTD_hashPtr(ip, ddsHashLog, mls) << ZSTD_LAZY_DDSS_BUCKET_LOG : 0; + + U32 matchIndex; + + if (dictMode == ZSTD_dedicatedDictSearch) { + const U32* entry = &dms->hashTable[ddsIdx]; + PREFETCH_L1(entry); + } + + /* HC4 match finder */ + matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls, ms->lazySkipping); + + for ( ; (matchIndex>=lowLimit) & (nbAttempts>0) ; nbAttempts--) { + size_t currentMl=0; + if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) { + const BYTE* const match = base + matchIndex; + assert(matchIndex >= dictLimit); /* ensures this is true if dictMode != ZSTD_extDict */ + /* read 4B starting from (match + ml + 1 - sizeof(U32)) */ + if (MEM_read32(match + ml - 3) == MEM_read32(ip + ml - 3)) /* potentially better */ + currentMl = ZSTD_count(ip, match, iLimit); + } else { + const BYTE* const match = dictBase + matchIndex; + assert(match+4 <= dictEnd); + if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */ + currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dictEnd, prefixStart) + 4; + } + + /* save best solution */ + if (currentMl > ml) { + ml = currentMl; + *offsetPtr = OFFSET_TO_OFFBASE(curr - matchIndex); + if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ + } + + if (matchIndex <= minChain) break; + matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask); + } + + assert(nbAttempts <= (1U << ZSTD_SEARCHLOG_MAX)); /* Check we haven't underflowed. */ + if (dictMode == ZSTD_dedicatedDictSearch) { + ml = ZSTD_dedicatedDictSearch_lazy_search(offsetPtr, ml, nbAttempts, dms, + ip, iLimit, prefixStart, curr, dictLimit, ddsIdx); + } else if (dictMode == ZSTD_dictMatchState) { + const U32* const dmsChainTable = dms->chainTable; + const U32 dmsChainSize = (1 << dms->cParams.chainLog); + const U32 dmsChainMask = dmsChainSize - 1; + const U32 dmsLowestIndex = dms->window.dictLimit; + const BYTE* const dmsBase = dms->window.base; + const BYTE* const dmsEnd = dms->window.nextSrc; + const U32 dmsSize = (U32)(dmsEnd - dmsBase); + const U32 dmsIndexDelta = dictLimit - dmsSize; + const U32 dmsMinChain = dmsSize > dmsChainSize ? dmsSize - dmsChainSize : 0; + + matchIndex = dms->hashTable[ZSTD_hashPtr(ip, dms->cParams.hashLog, mls)]; + + for ( ; (matchIndex>=dmsLowestIndex) & (nbAttempts>0) ; nbAttempts--) { + size_t currentMl=0; + const BYTE* const match = dmsBase + matchIndex; + assert(match+4 <= dmsEnd); + if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */ + currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dmsEnd, prefixStart) + 4; + + /* save best solution */ + if (currentMl > ml) { + ml = currentMl; + assert(curr > matchIndex + dmsIndexDelta); + *offsetPtr = OFFSET_TO_OFFBASE(curr - (matchIndex + dmsIndexDelta)); + if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ + } + + if (matchIndex <= dmsMinChain) break; + + matchIndex = dmsChainTable[matchIndex & dmsChainMask]; + } + } + + return ml; +} + +/* ********************************* +* (SIMD) Row-based matchfinder +***********************************/ +/* Constants for row-based hash */ +#define ZSTD_ROW_HASH_TAG_MASK ((1u << ZSTD_ROW_HASH_TAG_BITS) - 1) +#define ZSTD_ROW_HASH_MAX_ENTRIES 64 /* absolute maximum number of entries per row, for all configurations */ + +#define ZSTD_ROW_HASH_CACHE_MASK (ZSTD_ROW_HASH_CACHE_SIZE - 1) + +typedef U64 ZSTD_VecMask; /* Clarifies when we are interacting with a U64 representing a mask of matches */ + +/* ZSTD_VecMask_next(): + * Starting from the LSB, returns the idx of the next non-zero bit. + * Basically counting the nb of trailing zeroes. + */ +MEM_STATIC U32 ZSTD_VecMask_next(ZSTD_VecMask val) { + return ZSTD_countTrailingZeros64(val); +} + +/* ZSTD_row_nextIndex(): + * Returns the next index to insert at within a tagTable row, and updates the "head" + * value to reflect the update. Essentially cycles backwards from [1, {entries per row}) + */ +FORCE_INLINE_TEMPLATE U32 ZSTD_row_nextIndex(BYTE* const tagRow, U32 const rowMask) { + U32 next = (*tagRow-1) & rowMask; + next += (next == 0) ? rowMask : 0; /* skip first position */ + *tagRow = (BYTE)next; + return next; +} + +/* ZSTD_isAligned(): + * Checks that a pointer is aligned to "align" bytes which must be a power of 2. + */ +MEM_STATIC int ZSTD_isAligned(void const* ptr, size_t align) { + assert((align & (align - 1)) == 0); + return (((size_t)ptr) & (align - 1)) == 0; +} + +/* ZSTD_row_prefetch(): + * Performs prefetching for the hashTable and tagTable at a given row. + */ +FORCE_INLINE_TEMPLATE void ZSTD_row_prefetch(U32 const* hashTable, BYTE const* tagTable, U32 const relRow, U32 const rowLog) { + PREFETCH_L1(hashTable + relRow); + if (rowLog >= 5) { + PREFETCH_L1(hashTable + relRow + 16); + /* Note: prefetching more of the hash table does not appear to be beneficial for 128-entry rows */ + } + PREFETCH_L1(tagTable + relRow); + if (rowLog == 6) { + PREFETCH_L1(tagTable + relRow + 32); + } + assert(rowLog == 4 || rowLog == 5 || rowLog == 6); + assert(ZSTD_isAligned(hashTable + relRow, 64)); /* prefetched hash row always 64-byte aligned */ + assert(ZSTD_isAligned(tagTable + relRow, (size_t)1 << rowLog)); /* prefetched tagRow sits on correct multiple of bytes (32,64,128) */ +} + +/* ZSTD_row_fillHashCache(): + * Fill up the hash cache starting at idx, prefetching up to ZSTD_ROW_HASH_CACHE_SIZE entries, + * but not beyond iLimit. + */ +FORCE_INLINE_TEMPLATE +ZSTD_ALLOW_POINTER_OVERFLOW_ATTR +void ZSTD_row_fillHashCache(ZSTD_matchState_t* ms, const BYTE* base, + U32 const rowLog, U32 const mls, + U32 idx, const BYTE* const iLimit) +{ + U32 const* const hashTable = ms->hashTable; + BYTE const* const tagTable = ms->tagTable; + U32 const hashLog = ms->rowHashLog; + U32 const maxElemsToPrefetch = (base + idx) > iLimit ? 0 : (U32)(iLimit - (base + idx) + 1); + U32 const lim = idx + MIN(ZSTD_ROW_HASH_CACHE_SIZE, maxElemsToPrefetch); + + for (; idx < lim; ++idx) { + U32 const hash = (U32)ZSTD_hashPtrSalted(base + idx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, ms->hashSalt); + U32 const row = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog; + ZSTD_row_prefetch(hashTable, tagTable, row, rowLog); + ms->hashCache[idx & ZSTD_ROW_HASH_CACHE_MASK] = hash; + } + + DEBUGLOG(6, "ZSTD_row_fillHashCache(): [%u %u %u %u %u %u %u %u]", ms->hashCache[0], ms->hashCache[1], + ms->hashCache[2], ms->hashCache[3], ms->hashCache[4], + ms->hashCache[5], ms->hashCache[6], ms->hashCache[7]); +} + +/* ZSTD_row_nextCachedHash(): + * Returns the hash of base + idx, and replaces the hash in the hash cache with the byte at + * base + idx + ZSTD_ROW_HASH_CACHE_SIZE. Also prefetches the appropriate rows from hashTable and tagTable. + */ +FORCE_INLINE_TEMPLATE +ZSTD_ALLOW_POINTER_OVERFLOW_ATTR +U32 ZSTD_row_nextCachedHash(U32* cache, U32 const* hashTable, + BYTE const* tagTable, BYTE const* base, + U32 idx, U32 const hashLog, + U32 const rowLog, U32 const mls, + U64 const hashSalt) +{ + U32 const newHash = (U32)ZSTD_hashPtrSalted(base+idx+ZSTD_ROW_HASH_CACHE_SIZE, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, hashSalt); + U32 const row = (newHash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog; + ZSTD_row_prefetch(hashTable, tagTable, row, rowLog); + { U32 const hash = cache[idx & ZSTD_ROW_HASH_CACHE_MASK]; + cache[idx & ZSTD_ROW_HASH_CACHE_MASK] = newHash; + return hash; + } +} + +/* ZSTD_row_update_internalImpl(): + * Updates the hash table with positions starting from updateStartIdx until updateEndIdx. + */ +FORCE_INLINE_TEMPLATE +ZSTD_ALLOW_POINTER_OVERFLOW_ATTR +void ZSTD_row_update_internalImpl(ZSTD_matchState_t* ms, + U32 updateStartIdx, U32 const updateEndIdx, + U32 const mls, U32 const rowLog, + U32 const rowMask, U32 const useCache) +{ + U32* const hashTable = ms->hashTable; + BYTE* const tagTable = ms->tagTable; + U32 const hashLog = ms->rowHashLog; + const BYTE* const base = ms->window.base; + + DEBUGLOG(6, "ZSTD_row_update_internalImpl(): updateStartIdx=%u, updateEndIdx=%u", updateStartIdx, updateEndIdx); + for (; updateStartIdx < updateEndIdx; ++updateStartIdx) { + U32 const hash = useCache ? ZSTD_row_nextCachedHash(ms->hashCache, hashTable, tagTable, base, updateStartIdx, hashLog, rowLog, mls, ms->hashSalt) + : (U32)ZSTD_hashPtrSalted(base + updateStartIdx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, ms->hashSalt); + U32 const relRow = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog; + U32* const row = hashTable + relRow; + BYTE* tagRow = tagTable + relRow; + U32 const pos = ZSTD_row_nextIndex(tagRow, rowMask); + + assert(hash == ZSTD_hashPtrSalted(base + updateStartIdx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, ms->hashSalt)); + tagRow[pos] = hash & ZSTD_ROW_HASH_TAG_MASK; + row[pos] = updateStartIdx; + } +} + +/* ZSTD_row_update_internal(): + * Inserts the byte at ip into the appropriate position in the hash table, and updates ms->nextToUpdate. + * Skips sections of long matches as is necessary. + */ +FORCE_INLINE_TEMPLATE +ZSTD_ALLOW_POINTER_OVERFLOW_ATTR +void ZSTD_row_update_internal(ZSTD_matchState_t* ms, const BYTE* ip, + U32 const mls, U32 const rowLog, + U32 const rowMask, U32 const useCache) +{ + U32 idx = ms->nextToUpdate; + const BYTE* const base = ms->window.base; + const U32 target = (U32)(ip - base); + const U32 kSkipThreshold = 384; + const U32 kMaxMatchStartPositionsToUpdate = 96; + const U32 kMaxMatchEndPositionsToUpdate = 32; + + if (useCache) { + /* Only skip positions when using hash cache, i.e. + * if we are loading a dict, don't skip anything. + * If we decide to skip, then we only update a set number + * of positions at the beginning and end of the match. + */ + if (UNLIKELY(target - idx > kSkipThreshold)) { + U32 const bound = idx + kMaxMatchStartPositionsToUpdate; + ZSTD_row_update_internalImpl(ms, idx, bound, mls, rowLog, rowMask, useCache); + idx = target - kMaxMatchEndPositionsToUpdate; + ZSTD_row_fillHashCache(ms, base, rowLog, mls, idx, ip+1); + } + } + assert(target >= idx); + ZSTD_row_update_internalImpl(ms, idx, target, mls, rowLog, rowMask, useCache); + ms->nextToUpdate = target; +} + +/* ZSTD_row_update(): + * External wrapper for ZSTD_row_update_internal(). Used for filling the hashtable during dictionary + * processing. + */ +void ZSTD_row_update(ZSTD_matchState_t* const ms, const BYTE* ip) { + const U32 rowLog = BOUNDED(4, ms->cParams.searchLog, 6); + const U32 rowMask = (1u << rowLog) - 1; + const U32 mls = MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */); + + DEBUGLOG(5, "ZSTD_row_update(), rowLog=%u", rowLog); + ZSTD_row_update_internal(ms, ip, mls, rowLog, rowMask, 0 /* don't use cache */); +} + +/* Returns the mask width of bits group of which will be set to 1. Given not all + * architectures have easy movemask instruction, this helps to iterate over + * groups of bits easier and faster. + */ +FORCE_INLINE_TEMPLATE U32 +ZSTD_row_matchMaskGroupWidth(const U32 rowEntries) +{ + assert((rowEntries == 16) || (rowEntries == 32) || rowEntries == 64); + assert(rowEntries <= ZSTD_ROW_HASH_MAX_ENTRIES); + (void)rowEntries; +#if defined(ZSTD_ARCH_ARM_NEON) + /* NEON path only works for little endian */ + if (!MEM_isLittleEndian()) { + return 1; + } + if (rowEntries == 16) { + return 4; + } + if (rowEntries == 32) { + return 2; + } + if (rowEntries == 64) { + return 1; + } +#endif + return 1; +} + +#if defined(ZSTD_ARCH_X86_SSE2) +FORCE_INLINE_TEMPLATE ZSTD_VecMask +ZSTD_row_getSSEMask(int nbChunks, const BYTE* const src, const BYTE tag, const U32 head) +{ + const __m128i comparisonMask = _mm_set1_epi8((char)tag); + int matches[4] = {0}; + int i; + assert(nbChunks == 1 || nbChunks == 2 || nbChunks == 4); + for (i=0; i> chunkSize; + do { + size_t chunk = MEM_readST(&src[i]); + chunk ^= splatChar; + chunk = (((chunk | x80) - x01) | chunk) & x80; + matches <<= chunkSize; + matches |= (chunk * extractMagic) >> shiftAmount; + i -= chunkSize; + } while (i >= 0); + } else { /* big endian: reverse bits during extraction */ + const size_t msb = xFF ^ (xFF >> 1); + const size_t extractMagic = (msb / 0x1FF) | msb; + do { + size_t chunk = MEM_readST(&src[i]); + chunk ^= splatChar; + chunk = (((chunk | x80) - x01) | chunk) & x80; + matches <<= chunkSize; + matches |= ((chunk >> 7) * extractMagic) >> shiftAmount; + i -= chunkSize; + } while (i >= 0); + } + matches = ~matches; + if (rowEntries == 16) { + return ZSTD_rotateRight_U16((U16)matches, headGrouped); + } else if (rowEntries == 32) { + return ZSTD_rotateRight_U32((U32)matches, headGrouped); + } else { + return ZSTD_rotateRight_U64((U64)matches, headGrouped); + } + } +#endif +} + +/* The high-level approach of the SIMD row based match finder is as follows: + * - Figure out where to insert the new entry: + * - Generate a hash for current input posistion and split it into a one byte of tag and `rowHashLog` bits of index. + * - The hash is salted by a value that changes on every contex reset, so when the same table is used + * we will avoid collisions that would otherwise slow us down by intorducing phantom matches. + * - The hashTable is effectively split into groups or "rows" of 15 or 31 entries of U32, and the index determines + * which row to insert into. + * - Determine the correct position within the row to insert the entry into. Each row of 15 or 31 can + * be considered as a circular buffer with a "head" index that resides in the tagTable (overall 16 or 32 bytes + * per row). + * - Use SIMD to efficiently compare the tags in the tagTable to the 1-byte tag calculated for the position and + * generate a bitfield that we can cycle through to check the collisions in the hash table. + * - Pick the longest match. + * - Insert the tag into the equivalent row and position in the tagTable. + */ +FORCE_INLINE_TEMPLATE +ZSTD_ALLOW_POINTER_OVERFLOW_ATTR +size_t ZSTD_RowFindBestMatch( + ZSTD_matchState_t* ms, + const BYTE* const ip, const BYTE* const iLimit, + size_t* offsetPtr, + const U32 mls, const ZSTD_dictMode_e dictMode, + const U32 rowLog) +{ + U32* const hashTable = ms->hashTable; + BYTE* const tagTable = ms->tagTable; + U32* const hashCache = ms->hashCache; + const U32 hashLog = ms->rowHashLog; + const ZSTD_compressionParameters* const cParams = &ms->cParams; + const BYTE* const base = ms->window.base; + const BYTE* const dictBase = ms->window.dictBase; + const U32 dictLimit = ms->window.dictLimit; + const BYTE* const prefixStart = base + dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const U32 curr = (U32)(ip-base); + const U32 maxDistance = 1U << cParams->windowLog; + const U32 lowestValid = ms->window.lowLimit; + const U32 withinMaxDistance = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid; + const U32 isDictionary = (ms->loadedDictEnd != 0); + const U32 lowLimit = isDictionary ? lowestValid : withinMaxDistance; + const U32 rowEntries = (1U << rowLog); + const U32 rowMask = rowEntries - 1; + const U32 cappedSearchLog = MIN(cParams->searchLog, rowLog); /* nb of searches is capped at nb entries per row */ + const U32 groupWidth = ZSTD_row_matchMaskGroupWidth(rowEntries); + const U64 hashSalt = ms->hashSalt; + U32 nbAttempts = 1U << cappedSearchLog; + size_t ml=4-1; + U32 hash; + + /* DMS/DDS variables that may be referenced laster */ + const ZSTD_matchState_t* const dms = ms->dictMatchState; + + /* Initialize the following variables to satisfy static analyzer */ + size_t ddsIdx = 0; + U32 ddsExtraAttempts = 0; /* cctx hash tables are limited in searches, but allow extra searches into DDS */ + U32 dmsTag = 0; + U32* dmsRow = NULL; + BYTE* dmsTagRow = NULL; + + if (dictMode == ZSTD_dedicatedDictSearch) { + const U32 ddsHashLog = dms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG; + { /* Prefetch DDS hashtable entry */ + ddsIdx = ZSTD_hashPtr(ip, ddsHashLog, mls) << ZSTD_LAZY_DDSS_BUCKET_LOG; + PREFETCH_L1(&dms->hashTable[ddsIdx]); + } + ddsExtraAttempts = cParams->searchLog > rowLog ? 1U << (cParams->searchLog - rowLog) : 0; + } + + if (dictMode == ZSTD_dictMatchState) { + /* Prefetch DMS rows */ + U32* const dmsHashTable = dms->hashTable; + BYTE* const dmsTagTable = dms->tagTable; + U32 const dmsHash = (U32)ZSTD_hashPtr(ip, dms->rowHashLog + ZSTD_ROW_HASH_TAG_BITS, mls); + U32 const dmsRelRow = (dmsHash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog; + dmsTag = dmsHash & ZSTD_ROW_HASH_TAG_MASK; + dmsTagRow = (BYTE*)(dmsTagTable + dmsRelRow); + dmsRow = dmsHashTable + dmsRelRow; + ZSTD_row_prefetch(dmsHashTable, dmsTagTable, dmsRelRow, rowLog); + } + + /* Update the hashTable and tagTable up to (but not including) ip */ + if (!ms->lazySkipping) { + ZSTD_row_update_internal(ms, ip, mls, rowLog, rowMask, 1 /* useCache */); + hash = ZSTD_row_nextCachedHash(hashCache, hashTable, tagTable, base, curr, hashLog, rowLog, mls, hashSalt); + } else { + /* Stop inserting every position when in the lazy skipping mode. + * The hash cache is also not kept up to date in this mode. + */ + hash = (U32)ZSTD_hashPtrSalted(ip, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls, hashSalt); + ms->nextToUpdate = curr; + } + ms->hashSaltEntropy += hash; /* collect salt entropy */ + + { /* Get the hash for ip, compute the appropriate row */ + U32 const relRow = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog; + U32 const tag = hash & ZSTD_ROW_HASH_TAG_MASK; + U32* const row = hashTable + relRow; + BYTE* tagRow = (BYTE*)(tagTable + relRow); + U32 const headGrouped = (*tagRow & rowMask) * groupWidth; + U32 matchBuffer[ZSTD_ROW_HASH_MAX_ENTRIES]; + size_t numMatches = 0; + size_t currMatch = 0; + ZSTD_VecMask matches = ZSTD_row_getMatchMask(tagRow, (BYTE)tag, headGrouped, rowEntries); + + /* Cycle through the matches and prefetch */ + for (; (matches > 0) && (nbAttempts > 0); matches &= (matches - 1)) { + U32 const matchPos = ((headGrouped + ZSTD_VecMask_next(matches)) / groupWidth) & rowMask; + U32 const matchIndex = row[matchPos]; + if(matchPos == 0) continue; + assert(numMatches < rowEntries); + if (matchIndex < lowLimit) + break; + if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) { + PREFETCH_L1(base + matchIndex); + } else { + PREFETCH_L1(dictBase + matchIndex); + } + matchBuffer[numMatches++] = matchIndex; + --nbAttempts; + } + + /* Speed opt: insert current byte into hashtable too. This allows us to avoid one iteration of the loop + in ZSTD_row_update_internal() at the next search. */ + { + U32 const pos = ZSTD_row_nextIndex(tagRow, rowMask); + tagRow[pos] = (BYTE)tag; + row[pos] = ms->nextToUpdate++; + } + + /* Return the longest match */ + for (; currMatch < numMatches; ++currMatch) { + U32 const matchIndex = matchBuffer[currMatch]; + size_t currentMl=0; + assert(matchIndex < curr); + assert(matchIndex >= lowLimit); + + if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) { + const BYTE* const match = base + matchIndex; + assert(matchIndex >= dictLimit); /* ensures this is true if dictMode != ZSTD_extDict */ + /* read 4B starting from (match + ml + 1 - sizeof(U32)) */ + if (MEM_read32(match + ml - 3) == MEM_read32(ip + ml - 3)) /* potentially better */ + currentMl = ZSTD_count(ip, match, iLimit); + } else { + const BYTE* const match = dictBase + matchIndex; + assert(match+4 <= dictEnd); + if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */ + currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dictEnd, prefixStart) + 4; + } + + /* Save best solution */ + if (currentMl > ml) { + ml = currentMl; + *offsetPtr = OFFSET_TO_OFFBASE(curr - matchIndex); + if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ + } + } + } + + assert(nbAttempts <= (1U << ZSTD_SEARCHLOG_MAX)); /* Check we haven't underflowed. */ + if (dictMode == ZSTD_dedicatedDictSearch) { + ml = ZSTD_dedicatedDictSearch_lazy_search(offsetPtr, ml, nbAttempts + ddsExtraAttempts, dms, + ip, iLimit, prefixStart, curr, dictLimit, ddsIdx); + } else if (dictMode == ZSTD_dictMatchState) { + /* TODO: Measure and potentially add prefetching to DMS */ + const U32 dmsLowestIndex = dms->window.dictLimit; + const BYTE* const dmsBase = dms->window.base; + const BYTE* const dmsEnd = dms->window.nextSrc; + const U32 dmsSize = (U32)(dmsEnd - dmsBase); + const U32 dmsIndexDelta = dictLimit - dmsSize; + + { U32 const headGrouped = (*dmsTagRow & rowMask) * groupWidth; + U32 matchBuffer[ZSTD_ROW_HASH_MAX_ENTRIES]; + size_t numMatches = 0; + size_t currMatch = 0; + ZSTD_VecMask matches = ZSTD_row_getMatchMask(dmsTagRow, (BYTE)dmsTag, headGrouped, rowEntries); + + for (; (matches > 0) && (nbAttempts > 0); matches &= (matches - 1)) { + U32 const matchPos = ((headGrouped + ZSTD_VecMask_next(matches)) / groupWidth) & rowMask; + U32 const matchIndex = dmsRow[matchPos]; + if(matchPos == 0) continue; + if (matchIndex < dmsLowestIndex) + break; + PREFETCH_L1(dmsBase + matchIndex); + matchBuffer[numMatches++] = matchIndex; + --nbAttempts; + } + + /* Return the longest match */ + for (; currMatch < numMatches; ++currMatch) { + U32 const matchIndex = matchBuffer[currMatch]; + size_t currentMl=0; + assert(matchIndex >= dmsLowestIndex); + assert(matchIndex < curr); + + { const BYTE* const match = dmsBase + matchIndex; + assert(match+4 <= dmsEnd); + if (MEM_read32(match) == MEM_read32(ip)) + currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dmsEnd, prefixStart) + 4; + } + + if (currentMl > ml) { + ml = currentMl; + assert(curr > matchIndex + dmsIndexDelta); + *offsetPtr = OFFSET_TO_OFFBASE(curr - (matchIndex + dmsIndexDelta)); + if (ip+currentMl == iLimit) break; + } + } + } + } + return ml; +} + + +/** + * Generate search functions templated on (dictMode, mls, rowLog). + * These functions are outlined for code size & compilation time. + * ZSTD_searchMax() dispatches to the correct implementation function. + * + * TODO: The start of the search function involves loading and calculating a + * bunch of constants from the ZSTD_matchState_t. These computations could be + * done in an initialization function, and saved somewhere in the match state. + * Then we could pass a pointer to the saved state instead of the match state, + * and avoid duplicate computations. + * + * TODO: Move the match re-winding into searchMax. This improves compression + * ratio, and unlocks further simplifications with the next TODO. + * + * TODO: Try moving the repcode search into searchMax. After the re-winding + * and repcode search are in searchMax, there is no more logic in the match + * finder loop that requires knowledge about the dictMode. So we should be + * able to avoid force inlining it, and we can join the extDict loop with + * the single segment loop. It should go in searchMax instead of its own + * function to avoid having multiple virtual function calls per search. + */ + +#define ZSTD_BT_SEARCH_FN(dictMode, mls) ZSTD_BtFindBestMatch_##dictMode##_##mls +#define ZSTD_HC_SEARCH_FN(dictMode, mls) ZSTD_HcFindBestMatch_##dictMode##_##mls +#define ZSTD_ROW_SEARCH_FN(dictMode, mls, rowLog) ZSTD_RowFindBestMatch_##dictMode##_##mls##_##rowLog + +#define ZSTD_SEARCH_FN_ATTRS FORCE_NOINLINE + +#define GEN_ZSTD_BT_SEARCH_FN(dictMode, mls) \ + ZSTD_SEARCH_FN_ATTRS size_t ZSTD_BT_SEARCH_FN(dictMode, mls)( \ + ZSTD_matchState_t* ms, \ + const BYTE* ip, const BYTE* const iLimit, \ + size_t* offBasePtr) \ + { \ + assert(MAX(4, MIN(6, ms->cParams.minMatch)) == mls); \ + return ZSTD_BtFindBestMatch(ms, ip, iLimit, offBasePtr, mls, ZSTD_##dictMode); \ + } \ + +#define GEN_ZSTD_HC_SEARCH_FN(dictMode, mls) \ + ZSTD_SEARCH_FN_ATTRS size_t ZSTD_HC_SEARCH_FN(dictMode, mls)( \ + ZSTD_matchState_t* ms, \ + const BYTE* ip, const BYTE* const iLimit, \ + size_t* offsetPtr) \ + { \ + assert(MAX(4, MIN(6, ms->cParams.minMatch)) == mls); \ + return ZSTD_HcFindBestMatch(ms, ip, iLimit, offsetPtr, mls, ZSTD_##dictMode); \ + } \ + +#define GEN_ZSTD_ROW_SEARCH_FN(dictMode, mls, rowLog) \ + ZSTD_SEARCH_FN_ATTRS size_t ZSTD_ROW_SEARCH_FN(dictMode, mls, rowLog)( \ + ZSTD_matchState_t* ms, \ + const BYTE* ip, const BYTE* const iLimit, \ + size_t* offsetPtr) \ + { \ + assert(MAX(4, MIN(6, ms->cParams.minMatch)) == mls); \ + assert(MAX(4, MIN(6, ms->cParams.searchLog)) == rowLog); \ + return ZSTD_RowFindBestMatch(ms, ip, iLimit, offsetPtr, mls, ZSTD_##dictMode, rowLog); \ + } \ + +#define ZSTD_FOR_EACH_ROWLOG(X, dictMode, mls) \ + X(dictMode, mls, 4) \ + X(dictMode, mls, 5) \ + X(dictMode, mls, 6) + +#define ZSTD_FOR_EACH_MLS_ROWLOG(X, dictMode) \ + ZSTD_FOR_EACH_ROWLOG(X, dictMode, 4) \ + ZSTD_FOR_EACH_ROWLOG(X, dictMode, 5) \ + ZSTD_FOR_EACH_ROWLOG(X, dictMode, 6) + +#define ZSTD_FOR_EACH_MLS(X, dictMode) \ + X(dictMode, 4) \ + X(dictMode, 5) \ + X(dictMode, 6) + +#define ZSTD_FOR_EACH_DICT_MODE(X, ...) \ + X(__VA_ARGS__, noDict) \ + X(__VA_ARGS__, extDict) \ + X(__VA_ARGS__, dictMatchState) \ + X(__VA_ARGS__, dedicatedDictSearch) + +/* Generate row search fns for each combination of (dictMode, mls, rowLog) */ +ZSTD_FOR_EACH_DICT_MODE(ZSTD_FOR_EACH_MLS_ROWLOG, GEN_ZSTD_ROW_SEARCH_FN) +/* Generate binary Tree search fns for each combination of (dictMode, mls) */ +ZSTD_FOR_EACH_DICT_MODE(ZSTD_FOR_EACH_MLS, GEN_ZSTD_BT_SEARCH_FN) +/* Generate hash chain search fns for each combination of (dictMode, mls) */ +ZSTD_FOR_EACH_DICT_MODE(ZSTD_FOR_EACH_MLS, GEN_ZSTD_HC_SEARCH_FN) + +typedef enum { search_hashChain=0, search_binaryTree=1, search_rowHash=2 } searchMethod_e; + +#define GEN_ZSTD_CALL_BT_SEARCH_FN(dictMode, mls) \ + case mls: \ + return ZSTD_BT_SEARCH_FN(dictMode, mls)(ms, ip, iend, offsetPtr); +#define GEN_ZSTD_CALL_HC_SEARCH_FN(dictMode, mls) \ + case mls: \ + return ZSTD_HC_SEARCH_FN(dictMode, mls)(ms, ip, iend, offsetPtr); +#define GEN_ZSTD_CALL_ROW_SEARCH_FN(dictMode, mls, rowLog) \ + case rowLog: \ + return ZSTD_ROW_SEARCH_FN(dictMode, mls, rowLog)(ms, ip, iend, offsetPtr); + +#define ZSTD_SWITCH_MLS(X, dictMode) \ + switch (mls) { \ + ZSTD_FOR_EACH_MLS(X, dictMode) \ + } + +#define ZSTD_SWITCH_ROWLOG(dictMode, mls) \ + case mls: \ + switch (rowLog) { \ + ZSTD_FOR_EACH_ROWLOG(GEN_ZSTD_CALL_ROW_SEARCH_FN, dictMode, mls) \ + } \ + ZSTD_UNREACHABLE; \ + break; + +#define ZSTD_SWITCH_SEARCH_METHOD(dictMode) \ + switch (searchMethod) { \ + case search_hashChain: \ + ZSTD_SWITCH_MLS(GEN_ZSTD_CALL_HC_SEARCH_FN, dictMode) \ + break; \ + case search_binaryTree: \ + ZSTD_SWITCH_MLS(GEN_ZSTD_CALL_BT_SEARCH_FN, dictMode) \ + break; \ + case search_rowHash: \ + ZSTD_SWITCH_MLS(ZSTD_SWITCH_ROWLOG, dictMode) \ + break; \ + } \ + ZSTD_UNREACHABLE; + +/** + * Searches for the longest match at @p ip. + * Dispatches to the correct implementation function based on the + * (searchMethod, dictMode, mls, rowLog). We use switch statements + * here instead of using an indirect function call through a function + * pointer because after Spectre and Meltdown mitigations, indirect + * function calls can be very costly, especially in the kernel. + * + * NOTE: dictMode and searchMethod should be templated, so those switch + * statements should be optimized out. Only the mls & rowLog switches + * should be left. + * + * @param ms The match state. + * @param ip The position to search at. + * @param iend The end of the input data. + * @param[out] offsetPtr Stores the match offset into this pointer. + * @param mls The minimum search length, in the range [4, 6]. + * @param rowLog The row log (if applicable), in the range [4, 6]. + * @param searchMethod The search method to use (templated). + * @param dictMode The dictMode (templated). + * + * @returns The length of the longest match found, or < mls if no match is found. + * If a match is found its offset is stored in @p offsetPtr. + */ +FORCE_INLINE_TEMPLATE size_t ZSTD_searchMax( + ZSTD_matchState_t* ms, + const BYTE* ip, + const BYTE* iend, + size_t* offsetPtr, + U32 const mls, + U32 const rowLog, + searchMethod_e const searchMethod, + ZSTD_dictMode_e const dictMode) +{ + if (dictMode == ZSTD_noDict) { + ZSTD_SWITCH_SEARCH_METHOD(noDict) + } else if (dictMode == ZSTD_extDict) { + ZSTD_SWITCH_SEARCH_METHOD(extDict) + } else if (dictMode == ZSTD_dictMatchState) { + ZSTD_SWITCH_SEARCH_METHOD(dictMatchState) + } else if (dictMode == ZSTD_dedicatedDictSearch) { + ZSTD_SWITCH_SEARCH_METHOD(dedicatedDictSearch) + } + ZSTD_UNREACHABLE; + return 0; +} + +/* ******************************* +* Common parser - lazy strategy +*********************************/ + +FORCE_INLINE_TEMPLATE +ZSTD_ALLOW_POINTER_OVERFLOW_ATTR +size_t ZSTD_compressBlock_lazy_generic( + ZSTD_matchState_t* ms, seqStore_t* seqStore, + U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize, + const searchMethod_e searchMethod, const U32 depth, + ZSTD_dictMode_e const dictMode) +{ + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = (searchMethod == search_rowHash) ? iend - 8 - ZSTD_ROW_HASH_CACHE_SIZE : iend - 8; + const BYTE* const base = ms->window.base; + const U32 prefixLowestIndex = ms->window.dictLimit; + const BYTE* const prefixLowest = base + prefixLowestIndex; + const U32 mls = BOUNDED(4, ms->cParams.minMatch, 6); + const U32 rowLog = BOUNDED(4, ms->cParams.searchLog, 6); + + U32 offset_1 = rep[0], offset_2 = rep[1]; + U32 offsetSaved1 = 0, offsetSaved2 = 0; + + const int isDMS = dictMode == ZSTD_dictMatchState; + const int isDDS = dictMode == ZSTD_dedicatedDictSearch; + const int isDxS = isDMS || isDDS; + const ZSTD_matchState_t* const dms = ms->dictMatchState; + const U32 dictLowestIndex = isDxS ? dms->window.dictLimit : 0; + const BYTE* const dictBase = isDxS ? dms->window.base : NULL; + const BYTE* const dictLowest = isDxS ? dictBase + dictLowestIndex : NULL; + const BYTE* const dictEnd = isDxS ? dms->window.nextSrc : NULL; + const U32 dictIndexDelta = isDxS ? + prefixLowestIndex - (U32)(dictEnd - dictBase) : + 0; + const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictLowest)); + + DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u) (searchFunc=%u)", (U32)dictMode, (U32)searchMethod); + ip += (dictAndPrefixLength == 0); + if (dictMode == ZSTD_noDict) { + U32 const curr = (U32)(ip - base); + U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, ms->cParams.windowLog); + U32 const maxRep = curr - windowLow; + if (offset_2 > maxRep) offsetSaved2 = offset_2, offset_2 = 0; + if (offset_1 > maxRep) offsetSaved1 = offset_1, offset_1 = 0; + } + if (isDxS) { + /* dictMatchState repCode checks don't currently handle repCode == 0 + * disabling. */ + assert(offset_1 <= dictAndPrefixLength); + assert(offset_2 <= dictAndPrefixLength); + } + + /* Reset the lazy skipping state */ + ms->lazySkipping = 0; + + if (searchMethod == search_rowHash) { + ZSTD_row_fillHashCache(ms, base, rowLog, mls, ms->nextToUpdate, ilimit); + } + + /* Match Loop */ +#if defined(__GNUC__) && defined(__x86_64__) + /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the + * code alignment is perturbed. To fix the instability align the loop on 32-bytes. + */ + __asm__(".p2align 5"); +#endif + while (ip < ilimit) { + size_t matchLength=0; + size_t offBase = REPCODE1_TO_OFFBASE; + const BYTE* start=ip+1; + DEBUGLOG(7, "search baseline (depth 0)"); + + /* check repCode */ + if (isDxS) { + const U32 repIndex = (U32)(ip - base) + 1 - offset_1; + const BYTE* repMatch = ((dictMode == ZSTD_dictMatchState || dictMode == ZSTD_dedicatedDictSearch) + && repIndex < prefixLowestIndex) ? + dictBase + (repIndex - dictIndexDelta) : + base + repIndex; + if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) + && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { + const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; + matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; + if (depth==0) goto _storeSequence; + } + } + if ( dictMode == ZSTD_noDict + && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) { + matchLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; + if (depth==0) goto _storeSequence; + } + + /* first search (depth 0) */ + { size_t offbaseFound = 999999999; + size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &offbaseFound, mls, rowLog, searchMethod, dictMode); + if (ml2 > matchLength) + matchLength = ml2, start = ip, offBase = offbaseFound; + } + + if (matchLength < 4) { + size_t const step = ((size_t)(ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */; + ip += step; + /* Enter the lazy skipping mode once we are skipping more than 8 bytes at a time. + * In this mode we stop inserting every position into our tables, and only insert + * positions that we search, which is one in step positions. + * The exact cutoff is flexible, I've just chosen a number that is reasonably high, + * so we minimize the compression ratio loss in "normal" scenarios. This mode gets + * triggered once we've gone 2KB without finding any matches. + */ + ms->lazySkipping = step > kLazySkippingStep; + continue; + } + + /* let's try to find a better solution */ + if (depth>=1) + while (ip0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) { + size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4; + int const gain2 = (int)(mlRep * 3); + int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offBase) + 1); + if ((mlRep >= 4) && (gain2 > gain1)) + matchLength = mlRep, offBase = REPCODE1_TO_OFFBASE, start = ip; + } + if (isDxS) { + const U32 repIndex = (U32)(ip - base) - offset_1; + const BYTE* repMatch = repIndex < prefixLowestIndex ? + dictBase + (repIndex - dictIndexDelta) : + base + repIndex; + if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) + && (MEM_read32(repMatch) == MEM_read32(ip)) ) { + const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; + size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; + int const gain2 = (int)(mlRep * 3); + int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offBase) + 1); + if ((mlRep >= 4) && (gain2 > gain1)) + matchLength = mlRep, offBase = REPCODE1_TO_OFFBASE, start = ip; + } + } + { size_t ofbCandidate=999999999; + size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &ofbCandidate, mls, rowLog, searchMethod, dictMode); + int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)ofbCandidate)); /* raw approx */ + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 4); + if ((ml2 >= 4) && (gain2 > gain1)) { + matchLength = ml2, offBase = ofbCandidate, start = ip; + continue; /* search a better one */ + } } + + /* let's find an even better one */ + if ((depth==2) && (ip0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) { + size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4; + int const gain2 = (int)(mlRep * 4); + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 1); + if ((mlRep >= 4) && (gain2 > gain1)) + matchLength = mlRep, offBase = REPCODE1_TO_OFFBASE, start = ip; + } + if (isDxS) { + const U32 repIndex = (U32)(ip - base) - offset_1; + const BYTE* repMatch = repIndex < prefixLowestIndex ? + dictBase + (repIndex - dictIndexDelta) : + base + repIndex; + if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) + && (MEM_read32(repMatch) == MEM_read32(ip)) ) { + const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; + size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; + int const gain2 = (int)(mlRep * 4); + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 1); + if ((mlRep >= 4) && (gain2 > gain1)) + matchLength = mlRep, offBase = REPCODE1_TO_OFFBASE, start = ip; + } + } + { size_t ofbCandidate=999999999; + size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &ofbCandidate, mls, rowLog, searchMethod, dictMode); + int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)ofbCandidate)); /* raw approx */ + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 7); + if ((ml2 >= 4) && (gain2 > gain1)) { + matchLength = ml2, offBase = ofbCandidate, start = ip; + continue; + } } } + break; /* nothing found : store previous solution */ + } + + /* NOTE: + * Pay attention that `start[-value]` can lead to strange undefined behavior + * notably if `value` is unsigned, resulting in a large positive `-value`. + */ + /* catch up */ + if (OFFBASE_IS_OFFSET(offBase)) { + if (dictMode == ZSTD_noDict) { + while ( ((start > anchor) & (start - OFFBASE_TO_OFFSET(offBase) > prefixLowest)) + && (start[-1] == (start-OFFBASE_TO_OFFSET(offBase))[-1]) ) /* only search for offset within prefix */ + { start--; matchLength++; } + } + if (isDxS) { + U32 const matchIndex = (U32)((size_t)(start-base) - OFFBASE_TO_OFFSET(offBase)); + const BYTE* match = (matchIndex < prefixLowestIndex) ? dictBase + matchIndex - dictIndexDelta : base + matchIndex; + const BYTE* const mStart = (matchIndex < prefixLowestIndex) ? dictLowest : prefixLowest; + while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */ + } + offset_2 = offset_1; offset_1 = (U32)OFFBASE_TO_OFFSET(offBase); + } + /* store sequence */ +_storeSequence: + { size_t const litLength = (size_t)(start - anchor); + ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offBase, matchLength); + anchor = ip = start + matchLength; + } + if (ms->lazySkipping) { + /* We've found a match, disable lazy skipping mode, and refill the hash cache. */ + if (searchMethod == search_rowHash) { + ZSTD_row_fillHashCache(ms, base, rowLog, mls, ms->nextToUpdate, ilimit); + } + ms->lazySkipping = 0; + } + + /* check immediate repcode */ + if (isDxS) { + while (ip <= ilimit) { + U32 const current2 = (U32)(ip-base); + U32 const repIndex = current2 - offset_2; + const BYTE* repMatch = repIndex < prefixLowestIndex ? + dictBase - dictIndexDelta + repIndex : + base + repIndex; + if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex) >= 3 /* intentional overflow */) + && (MEM_read32(repMatch) == MEM_read32(ip)) ) { + const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend; + matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4; + offBase = offset_2; offset_2 = offset_1; offset_1 = (U32)offBase; /* swap offset_2 <=> offset_1 */ + ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, matchLength); + ip += matchLength; + anchor = ip; + continue; + } + break; + } + } + + if (dictMode == ZSTD_noDict) { + while ( ((ip <= ilimit) & (offset_2>0)) + && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) { + /* store sequence */ + matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; + offBase = offset_2; offset_2 = offset_1; offset_1 = (U32)offBase; /* swap repcodes */ + ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, matchLength); + ip += matchLength; + anchor = ip; + continue; /* faster when present ... (?) */ + } } } + + /* If offset_1 started invalid (offsetSaved1 != 0) and became valid (offset_1 != 0), + * rotate saved offsets. See comment in ZSTD_compressBlock_fast_noDict for more context. */ + offsetSaved2 = ((offsetSaved1 != 0) && (offset_1 != 0)) ? offsetSaved1 : offsetSaved2; + + /* save reps for next block */ + rep[0] = offset_1 ? offset_1 : offsetSaved1; + rep[1] = offset_2 ? offset_2 : offsetSaved2; + + /* Return the last literals size */ + return (size_t)(iend - anchor); +} +#endif /* build exclusions */ + + +#ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR +size_t ZSTD_compressBlock_greedy( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_noDict); +} + +size_t ZSTD_compressBlock_greedy_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dictMatchState); +} + +size_t ZSTD_compressBlock_greedy_dedicatedDictSearch( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dedicatedDictSearch); +} + +size_t ZSTD_compressBlock_greedy_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_noDict); +} + +size_t ZSTD_compressBlock_greedy_dictMatchState_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_dictMatchState); +} + +size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_dedicatedDictSearch); +} +#endif + +#ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR +size_t ZSTD_compressBlock_lazy( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_noDict); +} + +size_t ZSTD_compressBlock_lazy_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dictMatchState); +} + +size_t ZSTD_compressBlock_lazy_dedicatedDictSearch( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dedicatedDictSearch); +} + +size_t ZSTD_compressBlock_lazy_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_noDict); +} + +size_t ZSTD_compressBlock_lazy_dictMatchState_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_dictMatchState); +} + +size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_dedicatedDictSearch); +} +#endif + +#ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR +size_t ZSTD_compressBlock_lazy2( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_noDict); +} + +size_t ZSTD_compressBlock_lazy2_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dictMatchState); +} + +size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dedicatedDictSearch); +} + +size_t ZSTD_compressBlock_lazy2_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_noDict); +} + +size_t ZSTD_compressBlock_lazy2_dictMatchState_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_dictMatchState); +} + +size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_dedicatedDictSearch); +} +#endif + +#ifndef ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR +size_t ZSTD_compressBlock_btlazy2( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_noDict); +} + +size_t ZSTD_compressBlock_btlazy2_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_dictMatchState); +} +#endif + +#if !defined(ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR) \ + || !defined(ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR) \ + || !defined(ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR) \ + || !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR) +FORCE_INLINE_TEMPLATE +ZSTD_ALLOW_POINTER_OVERFLOW_ATTR +size_t ZSTD_compressBlock_lazy_extDict_generic( + ZSTD_matchState_t* ms, seqStore_t* seqStore, + U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize, + const searchMethod_e searchMethod, const U32 depth) +{ + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = searchMethod == search_rowHash ? iend - 8 - ZSTD_ROW_HASH_CACHE_SIZE : iend - 8; + const BYTE* const base = ms->window.base; + const U32 dictLimit = ms->window.dictLimit; + const BYTE* const prefixStart = base + dictLimit; + const BYTE* const dictBase = ms->window.dictBase; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const dictStart = dictBase + ms->window.lowLimit; + const U32 windowLog = ms->cParams.windowLog; + const U32 mls = BOUNDED(4, ms->cParams.minMatch, 6); + const U32 rowLog = BOUNDED(4, ms->cParams.searchLog, 6); + + U32 offset_1 = rep[0], offset_2 = rep[1]; + + DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic (searchFunc=%u)", (U32)searchMethod); + + /* Reset the lazy skipping state */ + ms->lazySkipping = 0; + + /* init */ + ip += (ip == prefixStart); + if (searchMethod == search_rowHash) { + ZSTD_row_fillHashCache(ms, base, rowLog, mls, ms->nextToUpdate, ilimit); + } + + /* Match Loop */ +#if defined(__GNUC__) && defined(__x86_64__) + /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the + * code alignment is perturbed. To fix the instability align the loop on 32-bytes. + */ + __asm__(".p2align 5"); +#endif + while (ip < ilimit) { + size_t matchLength=0; + size_t offBase = REPCODE1_TO_OFFBASE; + const BYTE* start=ip+1; + U32 curr = (U32)(ip-base); + + /* check repCode */ + { const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr+1, windowLog); + const U32 repIndex = (U32)(curr+1 - offset_1); + const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; + const BYTE* const repMatch = repBase + repIndex; + if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow */ + & (offset_1 <= curr+1 - windowLow) ) /* note: we are searching at curr+1 */ + if (MEM_read32(ip+1) == MEM_read32(repMatch)) { + /* repcode detected we should take it */ + const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; + matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repEnd, prefixStart) + 4; + if (depth==0) goto _storeSequence; + } } + + /* first search (depth 0) */ + { size_t ofbCandidate = 999999999; + size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &ofbCandidate, mls, rowLog, searchMethod, ZSTD_extDict); + if (ml2 > matchLength) + matchLength = ml2, start = ip, offBase = ofbCandidate; + } + + if (matchLength < 4) { + size_t const step = ((size_t)(ip-anchor) >> kSearchStrength); + ip += step + 1; /* jump faster over incompressible sections */ + /* Enter the lazy skipping mode once we are skipping more than 8 bytes at a time. + * In this mode we stop inserting every position into our tables, and only insert + * positions that we search, which is one in step positions. + * The exact cutoff is flexible, I've just chosen a number that is reasonably high, + * so we minimize the compression ratio loss in "normal" scenarios. This mode gets + * triggered once we've gone 2KB without finding any matches. + */ + ms->lazySkipping = step > kLazySkippingStep; + continue; + } + + /* let's try to find a better solution */ + if (depth>=1) + while (ip= 3) /* intentional overflow : do not test positions overlapping 2 memory segments */ + & (offset_1 <= curr - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */ + if (MEM_read32(ip) == MEM_read32(repMatch)) { + /* repcode detected */ + const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; + size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; + int const gain2 = (int)(repLength * 3); + int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offBase) + 1); + if ((repLength >= 4) && (gain2 > gain1)) + matchLength = repLength, offBase = REPCODE1_TO_OFFBASE, start = ip; + } } + + /* search match, depth 1 */ + { size_t ofbCandidate = 999999999; + size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &ofbCandidate, mls, rowLog, searchMethod, ZSTD_extDict); + int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)ofbCandidate)); /* raw approx */ + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 4); + if ((ml2 >= 4) && (gain2 > gain1)) { + matchLength = ml2, offBase = ofbCandidate, start = ip; + continue; /* search a better one */ + } } + + /* let's find an even better one */ + if ((depth==2) && (ip= 3) /* intentional overflow : do not test positions overlapping 2 memory segments */ + & (offset_1 <= curr - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */ + if (MEM_read32(ip) == MEM_read32(repMatch)) { + /* repcode detected */ + const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; + size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; + int const gain2 = (int)(repLength * 4); + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 1); + if ((repLength >= 4) && (gain2 > gain1)) + matchLength = repLength, offBase = REPCODE1_TO_OFFBASE, start = ip; + } } + + /* search match, depth 2 */ + { size_t ofbCandidate = 999999999; + size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &ofbCandidate, mls, rowLog, searchMethod, ZSTD_extDict); + int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)ofbCandidate)); /* raw approx */ + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offBase) + 7); + if ((ml2 >= 4) && (gain2 > gain1)) { + matchLength = ml2, offBase = ofbCandidate, start = ip; + continue; + } } } + break; /* nothing found : store previous solution */ + } + + /* catch up */ + if (OFFBASE_IS_OFFSET(offBase)) { + U32 const matchIndex = (U32)((size_t)(start-base) - OFFBASE_TO_OFFSET(offBase)); + const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex; + const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart; + while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */ + offset_2 = offset_1; offset_1 = (U32)OFFBASE_TO_OFFSET(offBase); + } + + /* store sequence */ +_storeSequence: + { size_t const litLength = (size_t)(start - anchor); + ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offBase, matchLength); + anchor = ip = start + matchLength; + } + if (ms->lazySkipping) { + /* We've found a match, disable lazy skipping mode, and refill the hash cache. */ + if (searchMethod == search_rowHash) { + ZSTD_row_fillHashCache(ms, base, rowLog, mls, ms->nextToUpdate, ilimit); + } + ms->lazySkipping = 0; + } + + /* check immediate repcode */ + while (ip <= ilimit) { + const U32 repCurrent = (U32)(ip-base); + const U32 windowLow = ZSTD_getLowestMatchIndex(ms, repCurrent, windowLog); + const U32 repIndex = repCurrent - offset_2; + const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; + const BYTE* const repMatch = repBase + repIndex; + if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments */ + & (offset_2 <= repCurrent - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */ + if (MEM_read32(ip) == MEM_read32(repMatch)) { + /* repcode detected we should take it */ + const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; + matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; + offBase = offset_2; offset_2 = offset_1; offset_1 = (U32)offBase; /* swap offset history */ + ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, matchLength); + ip += matchLength; + anchor = ip; + continue; /* faster when present ... (?) */ + } + break; + } } + + /* Save reps for next block */ + rep[0] = offset_1; + rep[1] = offset_2; + + /* Return the last literals size */ + return (size_t)(iend - anchor); +} +#endif /* build exclusions */ + +#ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR +size_t ZSTD_compressBlock_greedy_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0); +} + +size_t ZSTD_compressBlock_greedy_extDict_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0); +} +#endif + +#ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR +size_t ZSTD_compressBlock_lazy_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) + +{ + return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1); +} + +size_t ZSTD_compressBlock_lazy_extDict_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) + +{ + return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1); +} +#endif + +#ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR +size_t ZSTD_compressBlock_lazy2_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) + +{ + return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2); +} + +size_t ZSTD_compressBlock_lazy2_extDict_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2); +} +#endif + +#ifndef ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR +size_t ZSTD_compressBlock_btlazy2_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) + +{ + return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2); +} +#endif diff --git a/c-blosc/internal-complibs/zstd-1.5.6/compress/zstd_lazy.h b/c-blosc/internal-complibs/zstd-1.5.6/compress/zstd_lazy.h new file mode 100644 index 0000000..3635813 --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.5.6/compress/zstd_lazy.h @@ -0,0 +1,202 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_LAZY_H +#define ZSTD_LAZY_H + +#if defined (__cplusplus) +extern "C" { +#endif + +#include "zstd_compress_internal.h" + +/** + * Dedicated Dictionary Search Structure bucket log. In the + * ZSTD_dedicatedDictSearch mode, the hashTable has + * 2 ** ZSTD_LAZY_DDSS_BUCKET_LOG entries in each bucket, rather than just + * one. + */ +#define ZSTD_LAZY_DDSS_BUCKET_LOG 2 + +#define ZSTD_ROW_HASH_TAG_BITS 8 /* nb bits to use for the tag */ + +#if !defined(ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR) \ + || !defined(ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR) \ + || !defined(ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR) \ + || !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR) +U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip); +void ZSTD_row_update(ZSTD_matchState_t* const ms, const BYTE* ip); + +void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip); + +void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue); /*! used in ZSTD_reduceIndex(). preemptively increase value of ZSTD_DUBT_UNSORTED_MARK */ +#endif + +#ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR +size_t ZSTD_compressBlock_greedy( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_greedy_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_greedy_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_greedy_dictMatchState_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_greedy_dedicatedDictSearch( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_greedy_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_greedy_extDict_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); + +#define ZSTD_COMPRESSBLOCK_GREEDY ZSTD_compressBlock_greedy +#define ZSTD_COMPRESSBLOCK_GREEDY_ROW ZSTD_compressBlock_greedy_row +#define ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE ZSTD_compressBlock_greedy_dictMatchState +#define ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE_ROW ZSTD_compressBlock_greedy_dictMatchState_row +#define ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH ZSTD_compressBlock_greedy_dedicatedDictSearch +#define ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH_ROW ZSTD_compressBlock_greedy_dedicatedDictSearch_row +#define ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT ZSTD_compressBlock_greedy_extDict +#define ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT_ROW ZSTD_compressBlock_greedy_extDict_row +#else +#define ZSTD_COMPRESSBLOCK_GREEDY NULL +#define ZSTD_COMPRESSBLOCK_GREEDY_ROW NULL +#define ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE NULL +#define ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE_ROW NULL +#define ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH NULL +#define ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH_ROW NULL +#define ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT NULL +#define ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT_ROW NULL +#endif + +#ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR +size_t ZSTD_compressBlock_lazy( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy_dictMatchState_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy_dedicatedDictSearch( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy_extDict_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); + +#define ZSTD_COMPRESSBLOCK_LAZY ZSTD_compressBlock_lazy +#define ZSTD_COMPRESSBLOCK_LAZY_ROW ZSTD_compressBlock_lazy_row +#define ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE ZSTD_compressBlock_lazy_dictMatchState +#define ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE_ROW ZSTD_compressBlock_lazy_dictMatchState_row +#define ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH ZSTD_compressBlock_lazy_dedicatedDictSearch +#define ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH_ROW ZSTD_compressBlock_lazy_dedicatedDictSearch_row +#define ZSTD_COMPRESSBLOCK_LAZY_EXTDICT ZSTD_compressBlock_lazy_extDict +#define ZSTD_COMPRESSBLOCK_LAZY_EXTDICT_ROW ZSTD_compressBlock_lazy_extDict_row +#else +#define ZSTD_COMPRESSBLOCK_LAZY NULL +#define ZSTD_COMPRESSBLOCK_LAZY_ROW NULL +#define ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE NULL +#define ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE_ROW NULL +#define ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH NULL +#define ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH_ROW NULL +#define ZSTD_COMPRESSBLOCK_LAZY_EXTDICT NULL +#define ZSTD_COMPRESSBLOCK_LAZY_EXTDICT_ROW NULL +#endif + +#ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR +size_t ZSTD_compressBlock_lazy2( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy2_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy2_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy2_dictMatchState_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy2_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy2_extDict_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); + +#define ZSTD_COMPRESSBLOCK_LAZY2 ZSTD_compressBlock_lazy2 +#define ZSTD_COMPRESSBLOCK_LAZY2_ROW ZSTD_compressBlock_lazy2_row +#define ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE ZSTD_compressBlock_lazy2_dictMatchState +#define ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE_ROW ZSTD_compressBlock_lazy2_dictMatchState_row +#define ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH ZSTD_compressBlock_lazy2_dedicatedDictSearch +#define ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH_ROW ZSTD_compressBlock_lazy2_dedicatedDictSearch_row +#define ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT ZSTD_compressBlock_lazy2_extDict +#define ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT_ROW ZSTD_compressBlock_lazy2_extDict_row +#else +#define ZSTD_COMPRESSBLOCK_LAZY2 NULL +#define ZSTD_COMPRESSBLOCK_LAZY2_ROW NULL +#define ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE NULL +#define ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE_ROW NULL +#define ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH NULL +#define ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH_ROW NULL +#define ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT NULL +#define ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT_ROW NULL +#endif + +#ifndef ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR +size_t ZSTD_compressBlock_btlazy2( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_btlazy2_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_btlazy2_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); + +#define ZSTD_COMPRESSBLOCK_BTLAZY2 ZSTD_compressBlock_btlazy2 +#define ZSTD_COMPRESSBLOCK_BTLAZY2_DICTMATCHSTATE ZSTD_compressBlock_btlazy2_dictMatchState +#define ZSTD_COMPRESSBLOCK_BTLAZY2_EXTDICT ZSTD_compressBlock_btlazy2_extDict +#else +#define ZSTD_COMPRESSBLOCK_BTLAZY2 NULL +#define ZSTD_COMPRESSBLOCK_BTLAZY2_DICTMATCHSTATE NULL +#define ZSTD_COMPRESSBLOCK_BTLAZY2_EXTDICT NULL +#endif + + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_LAZY_H */ diff --git a/c-blosc/internal-complibs/zstd-1.5.6/compress/zstd_ldm.c b/c-blosc/internal-complibs/zstd-1.5.6/compress/zstd_ldm.c new file mode 100644 index 0000000..17c069f --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.5.6/compress/zstd_ldm.c @@ -0,0 +1,730 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#include "zstd_ldm.h" + +#include "../common/debug.h" +#include "../common/xxhash.h" +#include "zstd_fast.h" /* ZSTD_fillHashTable() */ +#include "zstd_double_fast.h" /* ZSTD_fillDoubleHashTable() */ +#include "zstd_ldm_geartab.h" + +#define LDM_BUCKET_SIZE_LOG 3 +#define LDM_MIN_MATCH_LENGTH 64 +#define LDM_HASH_RLOG 7 + +typedef struct { + U64 rolling; + U64 stopMask; +} ldmRollingHashState_t; + +/** ZSTD_ldm_gear_init(): + * + * Initializes the rolling hash state such that it will honor the + * settings in params. */ +static void ZSTD_ldm_gear_init(ldmRollingHashState_t* state, ldmParams_t const* params) +{ + unsigned maxBitsInMask = MIN(params->minMatchLength, 64); + unsigned hashRateLog = params->hashRateLog; + + state->rolling = ~(U32)0; + + /* The choice of the splitting criterion is subject to two conditions: + * 1. it has to trigger on average every 2^(hashRateLog) bytes; + * 2. ideally, it has to depend on a window of minMatchLength bytes. + * + * In the gear hash algorithm, bit n depends on the last n bytes; + * so in order to obtain a good quality splitting criterion it is + * preferable to use bits with high weight. + * + * To match condition 1 we use a mask with hashRateLog bits set + * and, because of the previous remark, we make sure these bits + * have the highest possible weight while still respecting + * condition 2. + */ + if (hashRateLog > 0 && hashRateLog <= maxBitsInMask) { + state->stopMask = (((U64)1 << hashRateLog) - 1) << (maxBitsInMask - hashRateLog); + } else { + /* In this degenerate case we simply honor the hash rate. */ + state->stopMask = ((U64)1 << hashRateLog) - 1; + } +} + +/** ZSTD_ldm_gear_reset() + * Feeds [data, data + minMatchLength) into the hash without registering any + * splits. This effectively resets the hash state. This is used when skipping + * over data, either at the beginning of a block, or skipping sections. + */ +static void ZSTD_ldm_gear_reset(ldmRollingHashState_t* state, + BYTE const* data, size_t minMatchLength) +{ + U64 hash = state->rolling; + size_t n = 0; + +#define GEAR_ITER_ONCE() do { \ + hash = (hash << 1) + ZSTD_ldm_gearTab[data[n] & 0xff]; \ + n += 1; \ + } while (0) + while (n + 3 < minMatchLength) { + GEAR_ITER_ONCE(); + GEAR_ITER_ONCE(); + GEAR_ITER_ONCE(); + GEAR_ITER_ONCE(); + } + while (n < minMatchLength) { + GEAR_ITER_ONCE(); + } +#undef GEAR_ITER_ONCE +} + +/** ZSTD_ldm_gear_feed(): + * + * Registers in the splits array all the split points found in the first + * size bytes following the data pointer. This function terminates when + * either all the data has been processed or LDM_BATCH_SIZE splits are + * present in the splits array. + * + * Precondition: The splits array must not be full. + * Returns: The number of bytes processed. */ +static size_t ZSTD_ldm_gear_feed(ldmRollingHashState_t* state, + BYTE const* data, size_t size, + size_t* splits, unsigned* numSplits) +{ + size_t n; + U64 hash, mask; + + hash = state->rolling; + mask = state->stopMask; + n = 0; + +#define GEAR_ITER_ONCE() do { \ + hash = (hash << 1) + ZSTD_ldm_gearTab[data[n] & 0xff]; \ + n += 1; \ + if (UNLIKELY((hash & mask) == 0)) { \ + splits[*numSplits] = n; \ + *numSplits += 1; \ + if (*numSplits == LDM_BATCH_SIZE) \ + goto done; \ + } \ + } while (0) + + while (n + 3 < size) { + GEAR_ITER_ONCE(); + GEAR_ITER_ONCE(); + GEAR_ITER_ONCE(); + GEAR_ITER_ONCE(); + } + while (n < size) { + GEAR_ITER_ONCE(); + } + +#undef GEAR_ITER_ONCE + +done: + state->rolling = hash; + return n; +} + +void ZSTD_ldm_adjustParameters(ldmParams_t* params, + ZSTD_compressionParameters const* cParams) +{ + params->windowLog = cParams->windowLog; + ZSTD_STATIC_ASSERT(LDM_BUCKET_SIZE_LOG <= ZSTD_LDM_BUCKETSIZELOG_MAX); + DEBUGLOG(4, "ZSTD_ldm_adjustParameters"); + if (!params->bucketSizeLog) params->bucketSizeLog = LDM_BUCKET_SIZE_LOG; + if (!params->minMatchLength) params->minMatchLength = LDM_MIN_MATCH_LENGTH; + if (params->hashLog == 0) { + params->hashLog = MAX(ZSTD_HASHLOG_MIN, params->windowLog - LDM_HASH_RLOG); + assert(params->hashLog <= ZSTD_HASHLOG_MAX); + } + if (params->hashRateLog == 0) { + params->hashRateLog = params->windowLog < params->hashLog + ? 0 + : params->windowLog - params->hashLog; + } + params->bucketSizeLog = MIN(params->bucketSizeLog, params->hashLog); +} + +size_t ZSTD_ldm_getTableSize(ldmParams_t params) +{ + size_t const ldmHSize = ((size_t)1) << params.hashLog; + size_t const ldmBucketSizeLog = MIN(params.bucketSizeLog, params.hashLog); + size_t const ldmBucketSize = ((size_t)1) << (params.hashLog - ldmBucketSizeLog); + size_t const totalSize = ZSTD_cwksp_alloc_size(ldmBucketSize) + + ZSTD_cwksp_alloc_size(ldmHSize * sizeof(ldmEntry_t)); + return params.enableLdm == ZSTD_ps_enable ? totalSize : 0; +} + +size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize) +{ + return params.enableLdm == ZSTD_ps_enable ? (maxChunkSize / params.minMatchLength) : 0; +} + +/** ZSTD_ldm_getBucket() : + * Returns a pointer to the start of the bucket associated with hash. */ +static ldmEntry_t* ZSTD_ldm_getBucket( + ldmState_t* ldmState, size_t hash, ldmParams_t const ldmParams) +{ + return ldmState->hashTable + (hash << ldmParams.bucketSizeLog); +} + +/** ZSTD_ldm_insertEntry() : + * Insert the entry with corresponding hash into the hash table */ +static void ZSTD_ldm_insertEntry(ldmState_t* ldmState, + size_t const hash, const ldmEntry_t entry, + ldmParams_t const ldmParams) +{ + BYTE* const pOffset = ldmState->bucketOffsets + hash; + unsigned const offset = *pOffset; + + *(ZSTD_ldm_getBucket(ldmState, hash, ldmParams) + offset) = entry; + *pOffset = (BYTE)((offset + 1) & ((1u << ldmParams.bucketSizeLog) - 1)); + +} + +/** ZSTD_ldm_countBackwardsMatch() : + * Returns the number of bytes that match backwards before pIn and pMatch. + * + * We count only bytes where pMatch >= pBase and pIn >= pAnchor. */ +static size_t ZSTD_ldm_countBackwardsMatch( + const BYTE* pIn, const BYTE* pAnchor, + const BYTE* pMatch, const BYTE* pMatchBase) +{ + size_t matchLength = 0; + while (pIn > pAnchor && pMatch > pMatchBase && pIn[-1] == pMatch[-1]) { + pIn--; + pMatch--; + matchLength++; + } + return matchLength; +} + +/** ZSTD_ldm_countBackwardsMatch_2segments() : + * Returns the number of bytes that match backwards from pMatch, + * even with the backwards match spanning 2 different segments. + * + * On reaching `pMatchBase`, start counting from mEnd */ +static size_t ZSTD_ldm_countBackwardsMatch_2segments( + const BYTE* pIn, const BYTE* pAnchor, + const BYTE* pMatch, const BYTE* pMatchBase, + const BYTE* pExtDictStart, const BYTE* pExtDictEnd) +{ + size_t matchLength = ZSTD_ldm_countBackwardsMatch(pIn, pAnchor, pMatch, pMatchBase); + if (pMatch - matchLength != pMatchBase || pMatchBase == pExtDictStart) { + /* If backwards match is entirely in the extDict or prefix, immediately return */ + return matchLength; + } + DEBUGLOG(7, "ZSTD_ldm_countBackwardsMatch_2segments: found 2-parts backwards match (length in prefix==%zu)", matchLength); + matchLength += ZSTD_ldm_countBackwardsMatch(pIn - matchLength, pAnchor, pExtDictEnd, pExtDictStart); + DEBUGLOG(7, "final backwards match length = %zu", matchLength); + return matchLength; +} + +/** ZSTD_ldm_fillFastTables() : + * + * Fills the relevant tables for the ZSTD_fast and ZSTD_dfast strategies. + * This is similar to ZSTD_loadDictionaryContent. + * + * The tables for the other strategies are filled within their + * block compressors. */ +static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms, + void const* end) +{ + const BYTE* const iend = (const BYTE*)end; + + switch(ms->cParams.strategy) + { + case ZSTD_fast: + ZSTD_fillHashTable(ms, iend, ZSTD_dtlm_fast, ZSTD_tfp_forCCtx); + break; + + case ZSTD_dfast: +#ifndef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR + ZSTD_fillDoubleHashTable(ms, iend, ZSTD_dtlm_fast, ZSTD_tfp_forCCtx); +#else + assert(0); /* shouldn't be called: cparams should've been adjusted. */ +#endif + break; + + case ZSTD_greedy: + case ZSTD_lazy: + case ZSTD_lazy2: + case ZSTD_btlazy2: + case ZSTD_btopt: + case ZSTD_btultra: + case ZSTD_btultra2: + break; + default: + assert(0); /* not possible : not a valid strategy id */ + } + + return 0; +} + +void ZSTD_ldm_fillHashTable( + ldmState_t* ldmState, const BYTE* ip, + const BYTE* iend, ldmParams_t const* params) +{ + U32 const minMatchLength = params->minMatchLength; + U32 const hBits = params->hashLog - params->bucketSizeLog; + BYTE const* const base = ldmState->window.base; + BYTE const* const istart = ip; + ldmRollingHashState_t hashState; + size_t* const splits = ldmState->splitIndices; + unsigned numSplits; + + DEBUGLOG(5, "ZSTD_ldm_fillHashTable"); + + ZSTD_ldm_gear_init(&hashState, params); + while (ip < iend) { + size_t hashed; + unsigned n; + + numSplits = 0; + hashed = ZSTD_ldm_gear_feed(&hashState, ip, iend - ip, splits, &numSplits); + + for (n = 0; n < numSplits; n++) { + if (ip + splits[n] >= istart + minMatchLength) { + BYTE const* const split = ip + splits[n] - minMatchLength; + U64 const xxhash = XXH64(split, minMatchLength, 0); + U32 const hash = (U32)(xxhash & (((U32)1 << hBits) - 1)); + ldmEntry_t entry; + + entry.offset = (U32)(split - base); + entry.checksum = (U32)(xxhash >> 32); + ZSTD_ldm_insertEntry(ldmState, hash, entry, *params); + } + } + + ip += hashed; + } +} + + +/** ZSTD_ldm_limitTableUpdate() : + * + * Sets cctx->nextToUpdate to a position corresponding closer to anchor + * if it is far way + * (after a long match, only update tables a limited amount). */ +static void ZSTD_ldm_limitTableUpdate(ZSTD_matchState_t* ms, const BYTE* anchor) +{ + U32 const curr = (U32)(anchor - ms->window.base); + if (curr > ms->nextToUpdate + 1024) { + ms->nextToUpdate = + curr - MIN(512, curr - ms->nextToUpdate - 1024); + } +} + +static +ZSTD_ALLOW_POINTER_OVERFLOW_ATTR +size_t ZSTD_ldm_generateSequences_internal( + ldmState_t* ldmState, rawSeqStore_t* rawSeqStore, + ldmParams_t const* params, void const* src, size_t srcSize) +{ + /* LDM parameters */ + int const extDict = ZSTD_window_hasExtDict(ldmState->window); + U32 const minMatchLength = params->minMatchLength; + U32 const entsPerBucket = 1U << params->bucketSizeLog; + U32 const hBits = params->hashLog - params->bucketSizeLog; + /* Prefix and extDict parameters */ + U32 const dictLimit = ldmState->window.dictLimit; + U32 const lowestIndex = extDict ? ldmState->window.lowLimit : dictLimit; + BYTE const* const base = ldmState->window.base; + BYTE const* const dictBase = extDict ? ldmState->window.dictBase : NULL; + BYTE const* const dictStart = extDict ? dictBase + lowestIndex : NULL; + BYTE const* const dictEnd = extDict ? dictBase + dictLimit : NULL; + BYTE const* const lowPrefixPtr = base + dictLimit; + /* Input bounds */ + BYTE const* const istart = (BYTE const*)src; + BYTE const* const iend = istart + srcSize; + BYTE const* const ilimit = iend - HASH_READ_SIZE; + /* Input positions */ + BYTE const* anchor = istart; + BYTE const* ip = istart; + /* Rolling hash state */ + ldmRollingHashState_t hashState; + /* Arrays for staged-processing */ + size_t* const splits = ldmState->splitIndices; + ldmMatchCandidate_t* const candidates = ldmState->matchCandidates; + unsigned numSplits; + + if (srcSize < minMatchLength) + return iend - anchor; + + /* Initialize the rolling hash state with the first minMatchLength bytes */ + ZSTD_ldm_gear_init(&hashState, params); + ZSTD_ldm_gear_reset(&hashState, ip, minMatchLength); + ip += minMatchLength; + + while (ip < ilimit) { + size_t hashed; + unsigned n; + + numSplits = 0; + hashed = ZSTD_ldm_gear_feed(&hashState, ip, ilimit - ip, + splits, &numSplits); + + for (n = 0; n < numSplits; n++) { + BYTE const* const split = ip + splits[n] - minMatchLength; + U64 const xxhash = XXH64(split, minMatchLength, 0); + U32 const hash = (U32)(xxhash & (((U32)1 << hBits) - 1)); + + candidates[n].split = split; + candidates[n].hash = hash; + candidates[n].checksum = (U32)(xxhash >> 32); + candidates[n].bucket = ZSTD_ldm_getBucket(ldmState, hash, *params); + PREFETCH_L1(candidates[n].bucket); + } + + for (n = 0; n < numSplits; n++) { + size_t forwardMatchLength = 0, backwardMatchLength = 0, + bestMatchLength = 0, mLength; + U32 offset; + BYTE const* const split = candidates[n].split; + U32 const checksum = candidates[n].checksum; + U32 const hash = candidates[n].hash; + ldmEntry_t* const bucket = candidates[n].bucket; + ldmEntry_t const* cur; + ldmEntry_t const* bestEntry = NULL; + ldmEntry_t newEntry; + + newEntry.offset = (U32)(split - base); + newEntry.checksum = checksum; + + /* If a split point would generate a sequence overlapping with + * the previous one, we merely register it in the hash table and + * move on */ + if (split < anchor) { + ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params); + continue; + } + + for (cur = bucket; cur < bucket + entsPerBucket; cur++) { + size_t curForwardMatchLength, curBackwardMatchLength, + curTotalMatchLength; + if (cur->checksum != checksum || cur->offset <= lowestIndex) { + continue; + } + if (extDict) { + BYTE const* const curMatchBase = + cur->offset < dictLimit ? dictBase : base; + BYTE const* const pMatch = curMatchBase + cur->offset; + BYTE const* const matchEnd = + cur->offset < dictLimit ? dictEnd : iend; + BYTE const* const lowMatchPtr = + cur->offset < dictLimit ? dictStart : lowPrefixPtr; + curForwardMatchLength = + ZSTD_count_2segments(split, pMatch, iend, matchEnd, lowPrefixPtr); + if (curForwardMatchLength < minMatchLength) { + continue; + } + curBackwardMatchLength = ZSTD_ldm_countBackwardsMatch_2segments( + split, anchor, pMatch, lowMatchPtr, dictStart, dictEnd); + } else { /* !extDict */ + BYTE const* const pMatch = base + cur->offset; + curForwardMatchLength = ZSTD_count(split, pMatch, iend); + if (curForwardMatchLength < minMatchLength) { + continue; + } + curBackwardMatchLength = + ZSTD_ldm_countBackwardsMatch(split, anchor, pMatch, lowPrefixPtr); + } + curTotalMatchLength = curForwardMatchLength + curBackwardMatchLength; + + if (curTotalMatchLength > bestMatchLength) { + bestMatchLength = curTotalMatchLength; + forwardMatchLength = curForwardMatchLength; + backwardMatchLength = curBackwardMatchLength; + bestEntry = cur; + } + } + + /* No match found -- insert an entry into the hash table + * and process the next candidate match */ + if (bestEntry == NULL) { + ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params); + continue; + } + + /* Match found */ + offset = (U32)(split - base) - bestEntry->offset; + mLength = forwardMatchLength + backwardMatchLength; + { + rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size; + + /* Out of sequence storage */ + if (rawSeqStore->size == rawSeqStore->capacity) + return ERROR(dstSize_tooSmall); + seq->litLength = (U32)(split - backwardMatchLength - anchor); + seq->matchLength = (U32)mLength; + seq->offset = offset; + rawSeqStore->size++; + } + + /* Insert the current entry into the hash table --- it must be + * done after the previous block to avoid clobbering bestEntry */ + ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params); + + anchor = split + forwardMatchLength; + + /* If we find a match that ends after the data that we've hashed + * then we have a repeating, overlapping, pattern. E.g. all zeros. + * If one repetition of the pattern matches our `stopMask` then all + * repetitions will. We don't need to insert them all into out table, + * only the first one. So skip over overlapping matches. + * This is a major speed boost (20x) for compressing a single byte + * repeated, when that byte ends up in the table. + */ + if (anchor > ip + hashed) { + ZSTD_ldm_gear_reset(&hashState, anchor - minMatchLength, minMatchLength); + /* Continue the outer loop at anchor (ip + hashed == anchor). */ + ip = anchor - hashed; + break; + } + } + + ip += hashed; + } + + return iend - anchor; +} + +/*! ZSTD_ldm_reduceTable() : + * reduce table indexes by `reducerValue` */ +static void ZSTD_ldm_reduceTable(ldmEntry_t* const table, U32 const size, + U32 const reducerValue) +{ + U32 u; + for (u = 0; u < size; u++) { + if (table[u].offset < reducerValue) table[u].offset = 0; + else table[u].offset -= reducerValue; + } +} + +size_t ZSTD_ldm_generateSequences( + ldmState_t* ldmState, rawSeqStore_t* sequences, + ldmParams_t const* params, void const* src, size_t srcSize) +{ + U32 const maxDist = 1U << params->windowLog; + BYTE const* const istart = (BYTE const*)src; + BYTE const* const iend = istart + srcSize; + size_t const kMaxChunkSize = 1 << 20; + size_t const nbChunks = (srcSize / kMaxChunkSize) + ((srcSize % kMaxChunkSize) != 0); + size_t chunk; + size_t leftoverSize = 0; + + assert(ZSTD_CHUNKSIZE_MAX >= kMaxChunkSize); + /* Check that ZSTD_window_update() has been called for this chunk prior + * to passing it to this function. + */ + assert(ldmState->window.nextSrc >= (BYTE const*)src + srcSize); + /* The input could be very large (in zstdmt), so it must be broken up into + * chunks to enforce the maximum distance and handle overflow correction. + */ + assert(sequences->pos <= sequences->size); + assert(sequences->size <= sequences->capacity); + for (chunk = 0; chunk < nbChunks && sequences->size < sequences->capacity; ++chunk) { + BYTE const* const chunkStart = istart + chunk * kMaxChunkSize; + size_t const remaining = (size_t)(iend - chunkStart); + BYTE const *const chunkEnd = + (remaining < kMaxChunkSize) ? iend : chunkStart + kMaxChunkSize; + size_t const chunkSize = chunkEnd - chunkStart; + size_t newLeftoverSize; + size_t const prevSize = sequences->size; + + assert(chunkStart < iend); + /* 1. Perform overflow correction if necessary. */ + if (ZSTD_window_needOverflowCorrection(ldmState->window, 0, maxDist, ldmState->loadedDictEnd, chunkStart, chunkEnd)) { + U32 const ldmHSize = 1U << params->hashLog; + U32 const correction = ZSTD_window_correctOverflow( + &ldmState->window, /* cycleLog */ 0, maxDist, chunkStart); + ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction); + /* invalidate dictionaries on overflow correction */ + ldmState->loadedDictEnd = 0; + } + /* 2. We enforce the maximum offset allowed. + * + * kMaxChunkSize should be small enough that we don't lose too much of + * the window through early invalidation. + * TODO: * Test the chunk size. + * * Try invalidation after the sequence generation and test the + * offset against maxDist directly. + * + * NOTE: Because of dictionaries + sequence splitting we MUST make sure + * that any offset used is valid at the END of the sequence, since it may + * be split into two sequences. This condition holds when using + * ZSTD_window_enforceMaxDist(), but if we move to checking offsets + * against maxDist directly, we'll have to carefully handle that case. + */ + ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, &ldmState->loadedDictEnd, NULL); + /* 3. Generate the sequences for the chunk, and get newLeftoverSize. */ + newLeftoverSize = ZSTD_ldm_generateSequences_internal( + ldmState, sequences, params, chunkStart, chunkSize); + if (ZSTD_isError(newLeftoverSize)) + return newLeftoverSize; + /* 4. We add the leftover literals from previous iterations to the first + * newly generated sequence, or add the `newLeftoverSize` if none are + * generated. + */ + /* Prepend the leftover literals from the last call */ + if (prevSize < sequences->size) { + sequences->seq[prevSize].litLength += (U32)leftoverSize; + leftoverSize = newLeftoverSize; + } else { + assert(newLeftoverSize == chunkSize); + leftoverSize += chunkSize; + } + } + return 0; +} + +void +ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, U32 const minMatch) +{ + while (srcSize > 0 && rawSeqStore->pos < rawSeqStore->size) { + rawSeq* seq = rawSeqStore->seq + rawSeqStore->pos; + if (srcSize <= seq->litLength) { + /* Skip past srcSize literals */ + seq->litLength -= (U32)srcSize; + return; + } + srcSize -= seq->litLength; + seq->litLength = 0; + if (srcSize < seq->matchLength) { + /* Skip past the first srcSize of the match */ + seq->matchLength -= (U32)srcSize; + if (seq->matchLength < minMatch) { + /* The match is too short, omit it */ + if (rawSeqStore->pos + 1 < rawSeqStore->size) { + seq[1].litLength += seq[0].matchLength; + } + rawSeqStore->pos++; + } + return; + } + srcSize -= seq->matchLength; + seq->matchLength = 0; + rawSeqStore->pos++; + } +} + +/** + * If the sequence length is longer than remaining then the sequence is split + * between this block and the next. + * + * Returns the current sequence to handle, or if the rest of the block should + * be literals, it returns a sequence with offset == 0. + */ +static rawSeq maybeSplitSequence(rawSeqStore_t* rawSeqStore, + U32 const remaining, U32 const minMatch) +{ + rawSeq sequence = rawSeqStore->seq[rawSeqStore->pos]; + assert(sequence.offset > 0); + /* Likely: No partial sequence */ + if (remaining >= sequence.litLength + sequence.matchLength) { + rawSeqStore->pos++; + return sequence; + } + /* Cut the sequence short (offset == 0 ==> rest is literals). */ + if (remaining <= sequence.litLength) { + sequence.offset = 0; + } else if (remaining < sequence.litLength + sequence.matchLength) { + sequence.matchLength = remaining - sequence.litLength; + if (sequence.matchLength < minMatch) { + sequence.offset = 0; + } + } + /* Skip past `remaining` bytes for the future sequences. */ + ZSTD_ldm_skipSequences(rawSeqStore, remaining, minMatch); + return sequence; +} + +void ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) { + U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes); + while (currPos && rawSeqStore->pos < rawSeqStore->size) { + rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos]; + if (currPos >= currSeq.litLength + currSeq.matchLength) { + currPos -= currSeq.litLength + currSeq.matchLength; + rawSeqStore->pos++; + } else { + rawSeqStore->posInSequence = currPos; + break; + } + } + if (currPos == 0 || rawSeqStore->pos == rawSeqStore->size) { + rawSeqStore->posInSequence = 0; + } +} + +size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_paramSwitch_e useRowMatchFinder, + void const* src, size_t srcSize) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + unsigned const minMatch = cParams->minMatch; + ZSTD_blockCompressor const blockCompressor = + ZSTD_selectBlockCompressor(cParams->strategy, useRowMatchFinder, ZSTD_matchState_dictMode(ms)); + /* Input bounds */ + BYTE const* const istart = (BYTE const*)src; + BYTE const* const iend = istart + srcSize; + /* Input positions */ + BYTE const* ip = istart; + + DEBUGLOG(5, "ZSTD_ldm_blockCompress: srcSize=%zu", srcSize); + /* If using opt parser, use LDMs only as candidates rather than always accepting them */ + if (cParams->strategy >= ZSTD_btopt) { + size_t lastLLSize; + ms->ldmSeqStore = rawSeqStore; + lastLLSize = blockCompressor(ms, seqStore, rep, src, srcSize); + ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore, srcSize); + return lastLLSize; + } + + assert(rawSeqStore->pos <= rawSeqStore->size); + assert(rawSeqStore->size <= rawSeqStore->capacity); + /* Loop through each sequence and apply the block compressor to the literals */ + while (rawSeqStore->pos < rawSeqStore->size && ip < iend) { + /* maybeSplitSequence updates rawSeqStore->pos */ + rawSeq const sequence = maybeSplitSequence(rawSeqStore, + (U32)(iend - ip), minMatch); + /* End signal */ + if (sequence.offset == 0) + break; + + assert(ip + sequence.litLength + sequence.matchLength <= iend); + + /* Fill tables for block compressor */ + ZSTD_ldm_limitTableUpdate(ms, ip); + ZSTD_ldm_fillFastTables(ms, ip); + /* Run the block compressor */ + DEBUGLOG(5, "pos %u : calling block compressor on segment of size %u", (unsigned)(ip-istart), sequence.litLength); + { + int i; + size_t const newLitLength = + blockCompressor(ms, seqStore, rep, ip, sequence.litLength); + ip += sequence.litLength; + /* Update the repcodes */ + for (i = ZSTD_REP_NUM - 1; i > 0; i--) + rep[i] = rep[i-1]; + rep[0] = sequence.offset; + /* Store the sequence */ + ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, iend, + OFFSET_TO_OFFBASE(sequence.offset), + sequence.matchLength); + ip += sequence.matchLength; + } + } + /* Fill the tables for the block compressor */ + ZSTD_ldm_limitTableUpdate(ms, ip); + ZSTD_ldm_fillFastTables(ms, ip); + /* Compress the last literals */ + return blockCompressor(ms, seqStore, rep, ip, iend - ip); +} diff --git a/c-blosc/internal-complibs/zstd-1.5.6/compress/zstd_ldm.h b/c-blosc/internal-complibs/zstd-1.5.6/compress/zstd_ldm.h new file mode 100644 index 0000000..f147021 --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.5.6/compress/zstd_ldm.h @@ -0,0 +1,117 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_LDM_H +#define ZSTD_LDM_H + +#if defined (__cplusplus) +extern "C" { +#endif + +#include "zstd_compress_internal.h" /* ldmParams_t, U32 */ +#include "../zstd.h" /* ZSTD_CCtx, size_t */ + +/*-************************************* +* Long distance matching +***************************************/ + +#define ZSTD_LDM_DEFAULT_WINDOW_LOG ZSTD_WINDOWLOG_LIMIT_DEFAULT + +void ZSTD_ldm_fillHashTable( + ldmState_t* state, const BYTE* ip, + const BYTE* iend, ldmParams_t const* params); + +/** + * ZSTD_ldm_generateSequences(): + * + * Generates the sequences using the long distance match finder. + * Generates long range matching sequences in `sequences`, which parse a prefix + * of the source. `sequences` must be large enough to store every sequence, + * which can be checked with `ZSTD_ldm_getMaxNbSeq()`. + * @returns 0 or an error code. + * + * NOTE: The user must have called ZSTD_window_update() for all of the input + * they have, even if they pass it to ZSTD_ldm_generateSequences() in chunks. + * NOTE: This function returns an error if it runs out of space to store + * sequences. + */ +size_t ZSTD_ldm_generateSequences( + ldmState_t* ldms, rawSeqStore_t* sequences, + ldmParams_t const* params, void const* src, size_t srcSize); + +/** + * ZSTD_ldm_blockCompress(): + * + * Compresses a block using the predefined sequences, along with a secondary + * block compressor. The literals section of every sequence is passed to the + * secondary block compressor, and those sequences are interspersed with the + * predefined sequences. Returns the length of the last literals. + * Updates `rawSeqStore.pos` to indicate how many sequences have been consumed. + * `rawSeqStore.seq` may also be updated to split the last sequence between two + * blocks. + * @return The length of the last literals. + * + * NOTE: The source must be at most the maximum block size, but the predefined + * sequences can be any size, and may be longer than the block. In the case that + * they are longer than the block, the last sequences may need to be split into + * two. We handle that case correctly, and update `rawSeqStore` appropriately. + * NOTE: This function does not return any errors. + */ +size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_paramSwitch_e useRowMatchFinder, + void const* src, size_t srcSize); + +/** + * ZSTD_ldm_skipSequences(): + * + * Skip past `srcSize` bytes worth of sequences in `rawSeqStore`. + * Avoids emitting matches less than `minMatch` bytes. + * Must be called for data that is not passed to ZSTD_ldm_blockCompress(). + */ +void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, + U32 const minMatch); + +/* ZSTD_ldm_skipRawSeqStoreBytes(): + * Moves forward in rawSeqStore by nbBytes, updating fields 'pos' and 'posInSequence'. + * Not to be used in conjunction with ZSTD_ldm_skipSequences(). + * Must be called for data with is not passed to ZSTD_ldm_blockCompress(). + */ +void ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes); + +/** ZSTD_ldm_getTableSize() : + * Estimate the space needed for long distance matching tables or 0 if LDM is + * disabled. + */ +size_t ZSTD_ldm_getTableSize(ldmParams_t params); + +/** ZSTD_ldm_getSeqSpace() : + * Return an upper bound on the number of sequences that can be produced by + * the long distance matcher, or 0 if LDM is disabled. + */ +size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize); + +/** ZSTD_ldm_adjustParameters() : + * If the params->hashRateLog is not set, set it to its default value based on + * windowLog and params->hashLog. + * + * Ensures that params->bucketSizeLog is <= params->hashLog (setting it to + * params->hashLog if it is not). + * + * Ensures that the minMatchLength >= targetLength during optimal parsing. + */ +void ZSTD_ldm_adjustParameters(ldmParams_t* params, + ZSTD_compressionParameters const* cParams); + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_FAST_H */ diff --git a/c-blosc/internal-complibs/zstd-1.5.6/compress/zstd_ldm_geartab.h b/c-blosc/internal-complibs/zstd-1.5.6/compress/zstd_ldm_geartab.h new file mode 100644 index 0000000..ef34bc5 --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.5.6/compress/zstd_ldm_geartab.h @@ -0,0 +1,106 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_LDM_GEARTAB_H +#define ZSTD_LDM_GEARTAB_H + +#include "../common/compiler.h" /* UNUSED_ATTR */ +#include "../common/mem.h" /* U64 */ + +static UNUSED_ATTR const U64 ZSTD_ldm_gearTab[256] = { + 0xf5b8f72c5f77775c, 0x84935f266b7ac412, 0xb647ada9ca730ccc, + 0xb065bb4b114fb1de, 0x34584e7e8c3a9fd0, 0x4e97e17c6ae26b05, + 0x3a03d743bc99a604, 0xcecd042422c4044f, 0x76de76c58524259e, + 0x9c8528f65badeaca, 0x86563706e2097529, 0x2902475fa375d889, + 0xafb32a9739a5ebe6, 0xce2714da3883e639, 0x21eaf821722e69e, + 0x37b628620b628, 0x49a8d455d88caf5, 0x8556d711e6958140, + 0x4f7ae74fc605c1f, 0x829f0c3468bd3a20, 0x4ffdc885c625179e, + 0x8473de048a3daf1b, 0x51008822b05646b2, 0x69d75d12b2d1cc5f, + 0x8c9d4a19159154bc, 0xc3cc10f4abbd4003, 0xd06ddc1cecb97391, + 0xbe48e6e7ed80302e, 0x3481db31cee03547, 0xacc3f67cdaa1d210, + 0x65cb771d8c7f96cc, 0x8eb27177055723dd, 0xc789950d44cd94be, + 0x934feadc3700b12b, 0x5e485f11edbdf182, 0x1e2e2a46fd64767a, + 0x2969ca71d82efa7c, 0x9d46e9935ebbba2e, 0xe056b67e05e6822b, + 0x94d73f55739d03a0, 0xcd7010bdb69b5a03, 0x455ef9fcd79b82f4, + 0x869cb54a8749c161, 0x38d1a4fa6185d225, 0xb475166f94bbe9bb, + 0xa4143548720959f1, 0x7aed4780ba6b26ba, 0xd0ce264439e02312, + 0x84366d746078d508, 0xa8ce973c72ed17be, 0x21c323a29a430b01, + 0x9962d617e3af80ee, 0xab0ce91d9c8cf75b, 0x530e8ee6d19a4dbc, + 0x2ef68c0cf53f5d72, 0xc03a681640a85506, 0x496e4e9f9c310967, + 0x78580472b59b14a0, 0x273824c23b388577, 0x66bf923ad45cb553, + 0x47ae1a5a2492ba86, 0x35e304569e229659, 0x4765182a46870b6f, + 0x6cbab625e9099412, 0xddac9a2e598522c1, 0x7172086e666624f2, + 0xdf5003ca503b7837, 0x88c0c1db78563d09, 0x58d51865acfc289d, + 0x177671aec65224f1, 0xfb79d8a241e967d7, 0x2be1e101cad9a49a, + 0x6625682f6e29186b, 0x399553457ac06e50, 0x35dffb4c23abb74, + 0x429db2591f54aade, 0xc52802a8037d1009, 0x6acb27381f0b25f3, + 0xf45e2551ee4f823b, 0x8b0ea2d99580c2f7, 0x3bed519cbcb4e1e1, + 0xff452823dbb010a, 0x9d42ed614f3dd267, 0x5b9313c06257c57b, + 0xa114b8008b5e1442, 0xc1fe311c11c13d4b, 0x66e8763ea34c5568, + 0x8b982af1c262f05d, 0xee8876faaa75fbb7, 0x8a62a4d0d172bb2a, + 0xc13d94a3b7449a97, 0x6dbbba9dc15d037c, 0xc786101f1d92e0f1, + 0xd78681a907a0b79b, 0xf61aaf2962c9abb9, 0x2cfd16fcd3cb7ad9, + 0x868c5b6744624d21, 0x25e650899c74ddd7, 0xba042af4a7c37463, + 0x4eb1a539465a3eca, 0xbe09dbf03b05d5ca, 0x774e5a362b5472ba, + 0x47a1221229d183cd, 0x504b0ca18ef5a2df, 0xdffbdfbde2456eb9, + 0x46cd2b2fbee34634, 0xf2aef8fe819d98c3, 0x357f5276d4599d61, + 0x24a5483879c453e3, 0x88026889192b4b9, 0x28da96671782dbec, + 0x4ef37c40588e9aaa, 0x8837b90651bc9fb3, 0xc164f741d3f0e5d6, + 0xbc135a0a704b70ba, 0x69cd868f7622ada, 0xbc37ba89e0b9c0ab, + 0x47c14a01323552f6, 0x4f00794bacee98bb, 0x7107de7d637a69d5, + 0x88af793bb6f2255e, 0xf3c6466b8799b598, 0xc288c616aa7f3b59, + 0x81ca63cf42fca3fd, 0x88d85ace36a2674b, 0xd056bd3792389e7, + 0xe55c396c4e9dd32d, 0xbefb504571e6c0a6, 0x96ab32115e91e8cc, + 0xbf8acb18de8f38d1, 0x66dae58801672606, 0x833b6017872317fb, + 0xb87c16f2d1c92864, 0xdb766a74e58b669c, 0x89659f85c61417be, + 0xc8daad856011ea0c, 0x76a4b565b6fe7eae, 0xa469d085f6237312, + 0xaaf0365683a3e96c, 0x4dbb746f8424f7b8, 0x638755af4e4acc1, + 0x3d7807f5bde64486, 0x17be6d8f5bbb7639, 0x903f0cd44dc35dc, + 0x67b672eafdf1196c, 0xa676ff93ed4c82f1, 0x521d1004c5053d9d, + 0x37ba9ad09ccc9202, 0x84e54d297aacfb51, 0xa0b4b776a143445, + 0x820d471e20b348e, 0x1874383cb83d46dc, 0x97edeec7a1efe11c, + 0xb330e50b1bdc42aa, 0x1dd91955ce70e032, 0xa514cdb88f2939d5, + 0x2791233fd90db9d3, 0x7b670a4cc50f7a9b, 0x77c07d2a05c6dfa5, + 0xe3778b6646d0a6fa, 0xb39c8eda47b56749, 0x933ed448addbef28, + 0xaf846af6ab7d0bf4, 0xe5af208eb666e49, 0x5e6622f73534cd6a, + 0x297daeca42ef5b6e, 0x862daef3d35539a6, 0xe68722498f8e1ea9, + 0x981c53093dc0d572, 0xfa09b0bfbf86fbf5, 0x30b1e96166219f15, + 0x70e7d466bdc4fb83, 0x5a66736e35f2a8e9, 0xcddb59d2b7c1baef, + 0xd6c7d247d26d8996, 0xea4e39eac8de1ba3, 0x539c8bb19fa3aff2, + 0x9f90e4c5fd508d8, 0xa34e5956fbaf3385, 0x2e2f8e151d3ef375, + 0x173691e9b83faec1, 0xb85a8d56bf016379, 0x8382381267408ae3, + 0xb90f901bbdc0096d, 0x7c6ad32933bcec65, 0x76bb5e2f2c8ad595, + 0x390f851a6cf46d28, 0xc3e6064da1c2da72, 0xc52a0c101cfa5389, + 0xd78eaf84a3fbc530, 0x3781b9e2288b997e, 0x73c2f6dea83d05c4, + 0x4228e364c5b5ed7, 0x9d7a3edf0da43911, 0x8edcfeda24686756, + 0x5e7667a7b7a9b3a1, 0x4c4f389fa143791d, 0xb08bc1023da7cddc, + 0x7ab4be3ae529b1cc, 0x754e6132dbe74ff9, 0x71635442a839df45, + 0x2f6fb1643fbe52de, 0x961e0a42cf7a8177, 0xf3b45d83d89ef2ea, + 0xee3de4cf4a6e3e9b, 0xcd6848542c3295e7, 0xe4cee1664c78662f, + 0x9947548b474c68c4, 0x25d73777a5ed8b0b, 0xc915b1d636b7fc, + 0x21c2ba75d9b0d2da, 0x5f6b5dcf608a64a1, 0xdcf333255ff9570c, + 0x633b922418ced4ee, 0xc136dde0b004b34a, 0x58cc83b05d4b2f5a, + 0x5eb424dda28e42d2, 0x62df47369739cd98, 0xb4e0b42485e4ce17, + 0x16e1f0c1f9a8d1e7, 0x8ec3916707560ebf, 0x62ba6e2df2cc9db3, + 0xcbf9f4ff77d83a16, 0x78d9d7d07d2bbcc4, 0xef554ce1e02c41f4, + 0x8d7581127eccf94d, 0xa9b53336cb3c8a05, 0x38c42c0bf45c4f91, + 0x640893cdf4488863, 0x80ec34bc575ea568, 0x39f324f5b48eaa40, + 0xe9d9ed1f8eff527f, 0x9224fc058cc5a214, 0xbaba00b04cfe7741, + 0x309a9f120fcf52af, 0xa558f3ec65626212, 0x424bec8b7adabe2f, + 0x41622513a6aea433, 0xb88da2d5324ca798, 0xd287733b245528a4, + 0x9a44697e6d68aec3, 0x7b1093be2f49bb28, 0x50bbec632e3d8aad, + 0x6cd90723e1ea8283, 0x897b9e7431b02bf3, 0x219efdcb338a7047, + 0x3b0311f0a27c0656, 0xdb17bf91c0db96e7, 0x8cd4fd6b4e85a5b2, + 0xfab071054ba6409d, 0x40d6fe831fa9dfd9, 0xaf358debad7d791e, + 0xeb8d0e25a65e3e58, 0xbbcbd3df14e08580, 0xcf751f27ecdab2b, + 0x2b4da14f2613d8f4 +}; + +#endif /* ZSTD_LDM_GEARTAB_H */ diff --git a/c-blosc/internal-complibs/zstd-1.5.6/compress/zstd_opt.c b/c-blosc/internal-complibs/zstd-1.5.6/compress/zstd_opt.c new file mode 100644 index 0000000..e63073e --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.5.6/compress/zstd_opt.c @@ -0,0 +1,1576 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#include "zstd_compress_internal.h" +#include "hist.h" +#include "zstd_opt.h" + +#if !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR) \ + || !defined(ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR) \ + || !defined(ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR) + +#define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */ +#define ZSTD_MAX_PRICE (1<<30) + +#define ZSTD_PREDEF_THRESHOLD 8 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */ + + +/*-************************************* +* Price functions for optimal parser +***************************************/ + +#if 0 /* approximation at bit level (for tests) */ +# define BITCOST_ACCURACY 0 +# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY) +# define WEIGHT(stat, opt) ((void)(opt), ZSTD_bitWeight(stat)) +#elif 0 /* fractional bit accuracy (for tests) */ +# define BITCOST_ACCURACY 8 +# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY) +# define WEIGHT(stat,opt) ((void)(opt), ZSTD_fracWeight(stat)) +#else /* opt==approx, ultra==accurate */ +# define BITCOST_ACCURACY 8 +# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY) +# define WEIGHT(stat,opt) ((opt) ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat)) +#endif + +/* ZSTD_bitWeight() : + * provide estimated "cost" of a stat in full bits only */ +MEM_STATIC U32 ZSTD_bitWeight(U32 stat) +{ + return (ZSTD_highbit32(stat+1) * BITCOST_MULTIPLIER); +} + +/* ZSTD_fracWeight() : + * provide fractional-bit "cost" of a stat, + * using linear interpolation approximation */ +MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat) +{ + U32 const stat = rawStat + 1; + U32 const hb = ZSTD_highbit32(stat); + U32 const BWeight = hb * BITCOST_MULTIPLIER; + /* Fweight was meant for "Fractional weight" + * but it's effectively a value between 1 and 2 + * using fixed point arithmetic */ + U32 const FWeight = (stat << BITCOST_ACCURACY) >> hb; + U32 const weight = BWeight + FWeight; + assert(hb + BITCOST_ACCURACY < 31); + return weight; +} + +#if (DEBUGLEVEL>=2) +/* debugging function, + * @return price in bytes as fractional value + * for debug messages only */ +MEM_STATIC double ZSTD_fCost(int price) +{ + return (double)price / (BITCOST_MULTIPLIER*8); +} +#endif + +static int ZSTD_compressedLiterals(optState_t const* const optPtr) +{ + return optPtr->literalCompressionMode != ZSTD_ps_disable; +} + +static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel) +{ + if (ZSTD_compressedLiterals(optPtr)) + optPtr->litSumBasePrice = WEIGHT(optPtr->litSum, optLevel); + optPtr->litLengthSumBasePrice = WEIGHT(optPtr->litLengthSum, optLevel); + optPtr->matchLengthSumBasePrice = WEIGHT(optPtr->matchLengthSum, optLevel); + optPtr->offCodeSumBasePrice = WEIGHT(optPtr->offCodeSum, optLevel); +} + + +static U32 sum_u32(const unsigned table[], size_t nbElts) +{ + size_t n; + U32 total = 0; + for (n=0; n0); + unsigned const newStat = base + (table[s] >> shift); + sum += newStat; + table[s] = newStat; + } + return sum; +} + +/* ZSTD_scaleStats() : + * reduce all elt frequencies in table if sum too large + * return the resulting sum of elements */ +static U32 ZSTD_scaleStats(unsigned* table, U32 lastEltIndex, U32 logTarget) +{ + U32 const prevsum = sum_u32(table, lastEltIndex+1); + U32 const factor = prevsum >> logTarget; + DEBUGLOG(5, "ZSTD_scaleStats (nbElts=%u, target=%u)", (unsigned)lastEltIndex+1, (unsigned)logTarget); + assert(logTarget < 30); + if (factor <= 1) return prevsum; + return ZSTD_downscaleStats(table, lastEltIndex, ZSTD_highbit32(factor), base_1guaranteed); +} + +/* ZSTD_rescaleFreqs() : + * if first block (detected by optPtr->litLengthSum == 0) : init statistics + * take hints from dictionary if there is one + * and init from zero if there is none, + * using src for literals stats, and baseline stats for sequence symbols + * otherwise downscale existing stats, to be used as seed for next block. + */ +static void +ZSTD_rescaleFreqs(optState_t* const optPtr, + const BYTE* const src, size_t const srcSize, + int const optLevel) +{ + int const compressedLiterals = ZSTD_compressedLiterals(optPtr); + DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize); + optPtr->priceType = zop_dynamic; + + if (optPtr->litLengthSum == 0) { /* no literals stats collected -> first block assumed -> init */ + + /* heuristic: use pre-defined stats for too small inputs */ + if (srcSize <= ZSTD_PREDEF_THRESHOLD) { + DEBUGLOG(5, "srcSize <= %i : use predefined stats", ZSTD_PREDEF_THRESHOLD); + optPtr->priceType = zop_predef; + } + + assert(optPtr->symbolCosts != NULL); + if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) { + + /* huffman stats covering the full value set : table presumed generated by dictionary */ + optPtr->priceType = zop_dynamic; + + if (compressedLiterals) { + /* generate literals statistics from huffman table */ + unsigned lit; + assert(optPtr->litFreq != NULL); + optPtr->litSum = 0; + for (lit=0; lit<=MaxLit; lit++) { + U32 const scaleLog = 11; /* scale to 2K */ + U32 const bitCost = HUF_getNbBitsFromCTable(optPtr->symbolCosts->huf.CTable, lit); + assert(bitCost <= scaleLog); + optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/; + optPtr->litSum += optPtr->litFreq[lit]; + } } + + { unsigned ll; + FSE_CState_t llstate; + FSE_initCState(&llstate, optPtr->symbolCosts->fse.litlengthCTable); + optPtr->litLengthSum = 0; + for (ll=0; ll<=MaxLL; ll++) { + U32 const scaleLog = 10; /* scale to 1K */ + U32 const bitCost = FSE_getMaxNbBits(llstate.symbolTT, ll); + assert(bitCost < scaleLog); + optPtr->litLengthFreq[ll] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/; + optPtr->litLengthSum += optPtr->litLengthFreq[ll]; + } } + + { unsigned ml; + FSE_CState_t mlstate; + FSE_initCState(&mlstate, optPtr->symbolCosts->fse.matchlengthCTable); + optPtr->matchLengthSum = 0; + for (ml=0; ml<=MaxML; ml++) { + U32 const scaleLog = 10; + U32 const bitCost = FSE_getMaxNbBits(mlstate.symbolTT, ml); + assert(bitCost < scaleLog); + optPtr->matchLengthFreq[ml] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/; + optPtr->matchLengthSum += optPtr->matchLengthFreq[ml]; + } } + + { unsigned of; + FSE_CState_t ofstate; + FSE_initCState(&ofstate, optPtr->symbolCosts->fse.offcodeCTable); + optPtr->offCodeSum = 0; + for (of=0; of<=MaxOff; of++) { + U32 const scaleLog = 10; + U32 const bitCost = FSE_getMaxNbBits(ofstate.symbolTT, of); + assert(bitCost < scaleLog); + optPtr->offCodeFreq[of] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/; + optPtr->offCodeSum += optPtr->offCodeFreq[of]; + } } + + } else { /* first block, no dictionary */ + + assert(optPtr->litFreq != NULL); + if (compressedLiterals) { + /* base initial cost of literals on direct frequency within src */ + unsigned lit = MaxLit; + HIST_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */ + optPtr->litSum = ZSTD_downscaleStats(optPtr->litFreq, MaxLit, 8, base_0possible); + } + + { unsigned const baseLLfreqs[MaxLL+1] = { + 4, 2, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1 + }; + ZSTD_memcpy(optPtr->litLengthFreq, baseLLfreqs, sizeof(baseLLfreqs)); + optPtr->litLengthSum = sum_u32(baseLLfreqs, MaxLL+1); + } + + { unsigned ml; + for (ml=0; ml<=MaxML; ml++) + optPtr->matchLengthFreq[ml] = 1; + } + optPtr->matchLengthSum = MaxML+1; + + { unsigned const baseOFCfreqs[MaxOff+1] = { + 6, 2, 1, 1, 2, 3, 4, 4, + 4, 3, 2, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1 + }; + ZSTD_memcpy(optPtr->offCodeFreq, baseOFCfreqs, sizeof(baseOFCfreqs)); + optPtr->offCodeSum = sum_u32(baseOFCfreqs, MaxOff+1); + } + + } + + } else { /* new block : scale down accumulated statistics */ + + if (compressedLiterals) + optPtr->litSum = ZSTD_scaleStats(optPtr->litFreq, MaxLit, 12); + optPtr->litLengthSum = ZSTD_scaleStats(optPtr->litLengthFreq, MaxLL, 11); + optPtr->matchLengthSum = ZSTD_scaleStats(optPtr->matchLengthFreq, MaxML, 11); + optPtr->offCodeSum = ZSTD_scaleStats(optPtr->offCodeFreq, MaxOff, 11); + } + + ZSTD_setBasePrices(optPtr, optLevel); +} + +/* ZSTD_rawLiteralsCost() : + * price of literals (only) in specified segment (which length can be 0). + * does not include price of literalLength symbol */ +static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength, + const optState_t* const optPtr, + int optLevel) +{ + DEBUGLOG(8, "ZSTD_rawLiteralsCost (%u literals)", litLength); + if (litLength == 0) return 0; + + if (!ZSTD_compressedLiterals(optPtr)) + return (litLength << 3) * BITCOST_MULTIPLIER; /* Uncompressed - 8 bytes per literal. */ + + if (optPtr->priceType == zop_predef) + return (litLength*6) * BITCOST_MULTIPLIER; /* 6 bit per literal - no statistic used */ + + /* dynamic statistics */ + { U32 price = optPtr->litSumBasePrice * litLength; + U32 const litPriceMax = optPtr->litSumBasePrice - BITCOST_MULTIPLIER; + U32 u; + assert(optPtr->litSumBasePrice >= BITCOST_MULTIPLIER); + for (u=0; u < litLength; u++) { + U32 litPrice = WEIGHT(optPtr->litFreq[literals[u]], optLevel); + if (UNLIKELY(litPrice > litPriceMax)) litPrice = litPriceMax; + price -= litPrice; + } + return price; + } +} + +/* ZSTD_litLengthPrice() : + * cost of literalLength symbol */ +static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr, int optLevel) +{ + assert(litLength <= ZSTD_BLOCKSIZE_MAX); + if (optPtr->priceType == zop_predef) + return WEIGHT(litLength, optLevel); + + /* ZSTD_LLcode() can't compute litLength price for sizes >= ZSTD_BLOCKSIZE_MAX + * because it isn't representable in the zstd format. + * So instead just pretend it would cost 1 bit more than ZSTD_BLOCKSIZE_MAX - 1. + * In such a case, the block would be all literals. + */ + if (litLength == ZSTD_BLOCKSIZE_MAX) + return BITCOST_MULTIPLIER + ZSTD_litLengthPrice(ZSTD_BLOCKSIZE_MAX - 1, optPtr, optLevel); + + /* dynamic statistics */ + { U32 const llCode = ZSTD_LLcode(litLength); + return (LL_bits[llCode] * BITCOST_MULTIPLIER) + + optPtr->litLengthSumBasePrice + - WEIGHT(optPtr->litLengthFreq[llCode], optLevel); + } +} + +/* ZSTD_getMatchPrice() : + * Provides the cost of the match part (offset + matchLength) of a sequence. + * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence. + * @offBase : sumtype, representing an offset or a repcode, and using numeric representation of ZSTD_storeSeq() + * @optLevel: when <2, favors small offset for decompression speed (improved cache efficiency) + */ +FORCE_INLINE_TEMPLATE U32 +ZSTD_getMatchPrice(U32 const offBase, + U32 const matchLength, + const optState_t* const optPtr, + int const optLevel) +{ + U32 price; + U32 const offCode = ZSTD_highbit32(offBase); + U32 const mlBase = matchLength - MINMATCH; + assert(matchLength >= MINMATCH); + + if (optPtr->priceType == zop_predef) /* fixed scheme, does not use statistics */ + return WEIGHT(mlBase, optLevel) + + ((16 + offCode) * BITCOST_MULTIPLIER); /* emulated offset cost */ + + /* dynamic statistics */ + price = (offCode * BITCOST_MULTIPLIER) + (optPtr->offCodeSumBasePrice - WEIGHT(optPtr->offCodeFreq[offCode], optLevel)); + if ((optLevel<2) /*static*/ && offCode >= 20) + price += (offCode-19)*2 * BITCOST_MULTIPLIER; /* handicap for long distance offsets, favor decompression speed */ + + /* match Length */ + { U32 const mlCode = ZSTD_MLcode(mlBase); + price += (ML_bits[mlCode] * BITCOST_MULTIPLIER) + (optPtr->matchLengthSumBasePrice - WEIGHT(optPtr->matchLengthFreq[mlCode], optLevel)); + } + + price += BITCOST_MULTIPLIER / 5; /* heuristic : make matches a bit more costly to favor less sequences -> faster decompression speed */ + + DEBUGLOG(8, "ZSTD_getMatchPrice(ml:%u) = %u", matchLength, price); + return price; +} + +/* ZSTD_updateStats() : + * assumption : literals + litLength <= iend */ +static void ZSTD_updateStats(optState_t* const optPtr, + U32 litLength, const BYTE* literals, + U32 offBase, U32 matchLength) +{ + /* literals */ + if (ZSTD_compressedLiterals(optPtr)) { + U32 u; + for (u=0; u < litLength; u++) + optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD; + optPtr->litSum += litLength*ZSTD_LITFREQ_ADD; + } + + /* literal Length */ + { U32 const llCode = ZSTD_LLcode(litLength); + optPtr->litLengthFreq[llCode]++; + optPtr->litLengthSum++; + } + + /* offset code : follows storeSeq() numeric representation */ + { U32 const offCode = ZSTD_highbit32(offBase); + assert(offCode <= MaxOff); + optPtr->offCodeFreq[offCode]++; + optPtr->offCodeSum++; + } + + /* match Length */ + { U32 const mlBase = matchLength - MINMATCH; + U32 const mlCode = ZSTD_MLcode(mlBase); + optPtr->matchLengthFreq[mlCode]++; + optPtr->matchLengthSum++; + } +} + + +/* ZSTD_readMINMATCH() : + * function safe only for comparisons + * assumption : memPtr must be at least 4 bytes before end of buffer */ +MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length) +{ + switch (length) + { + default : + case 4 : return MEM_read32(memPtr); + case 3 : if (MEM_isLittleEndian()) + return MEM_read32(memPtr)<<8; + else + return MEM_read32(memPtr)>>8; + } +} + + +/* Update hashTable3 up to ip (excluded) + Assumption : always within prefix (i.e. not within extDict) */ +static +ZSTD_ALLOW_POINTER_OVERFLOW_ATTR +U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_matchState_t* ms, + U32* nextToUpdate3, + const BYTE* const ip) +{ + U32* const hashTable3 = ms->hashTable3; + U32 const hashLog3 = ms->hashLog3; + const BYTE* const base = ms->window.base; + U32 idx = *nextToUpdate3; + U32 const target = (U32)(ip - base); + size_t const hash3 = ZSTD_hash3Ptr(ip, hashLog3); + assert(hashLog3 > 0); + + while(idx < target) { + hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx; + idx++; + } + + *nextToUpdate3 = target; + return hashTable3[hash3]; +} + + +/*-************************************* +* Binary Tree search +***************************************/ +/** ZSTD_insertBt1() : add one or multiple positions to tree. + * @param ip assumed <= iend-8 . + * @param target The target of ZSTD_updateTree_internal() - we are filling to this position + * @return : nb of positions added */ +static +ZSTD_ALLOW_POINTER_OVERFLOW_ATTR +U32 ZSTD_insertBt1( + const ZSTD_matchState_t* ms, + const BYTE* const ip, const BYTE* const iend, + U32 const target, + U32 const mls, const int extDict) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32* const hashTable = ms->hashTable; + U32 const hashLog = cParams->hashLog; + size_t const h = ZSTD_hashPtr(ip, hashLog, mls); + U32* const bt = ms->chainTable; + U32 const btLog = cParams->chainLog - 1; + U32 const btMask = (1 << btLog) - 1; + U32 matchIndex = hashTable[h]; + size_t commonLengthSmaller=0, commonLengthLarger=0; + const BYTE* const base = ms->window.base; + const BYTE* const dictBase = ms->window.dictBase; + const U32 dictLimit = ms->window.dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const prefixStart = base + dictLimit; + const BYTE* match; + const U32 curr = (U32)(ip-base); + const U32 btLow = btMask >= curr ? 0 : curr - btMask; + U32* smallerPtr = bt + 2*(curr&btMask); + U32* largerPtr = smallerPtr + 1; + U32 dummy32; /* to be nullified at the end */ + /* windowLow is based on target because + * we only need positions that will be in the window at the end of the tree update. + */ + U32 const windowLow = ZSTD_getLowestMatchIndex(ms, target, cParams->windowLog); + U32 matchEndIdx = curr+8+1; + size_t bestLength = 8; + U32 nbCompares = 1U << cParams->searchLog; +#ifdef ZSTD_C_PREDICT + U32 predictedSmall = *(bt + 2*((curr-1)&btMask) + 0); + U32 predictedLarge = *(bt + 2*((curr-1)&btMask) + 1); + predictedSmall += (predictedSmall>0); + predictedLarge += (predictedLarge>0); +#endif /* ZSTD_C_PREDICT */ + + DEBUGLOG(8, "ZSTD_insertBt1 (%u)", curr); + + assert(curr <= target); + assert(ip <= iend-8); /* required for h calculation */ + hashTable[h] = curr; /* Update Hash Table */ + + assert(windowLow > 0); + for (; nbCompares && (matchIndex >= windowLow); --nbCompares) { + U32* const nextPtr = bt + 2*(matchIndex & btMask); + size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ + assert(matchIndex < curr); + +#ifdef ZSTD_C_PREDICT /* note : can create issues when hlog small <= 11 */ + const U32* predictPtr = bt + 2*((matchIndex-1) & btMask); /* written this way, as bt is a roll buffer */ + if (matchIndex == predictedSmall) { + /* no need to check length, result known */ + *smallerPtr = matchIndex; + if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + smallerPtr = nextPtr+1; /* new "smaller" => larger of match */ + matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ + predictedSmall = predictPtr[1] + (predictPtr[1]>0); + continue; + } + if (matchIndex == predictedLarge) { + *largerPtr = matchIndex; + if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + largerPtr = nextPtr; + matchIndex = nextPtr[0]; + predictedLarge = predictPtr[0] + (predictPtr[0]>0); + continue; + } +#endif + + if (!extDict || (matchIndex+matchLength >= dictLimit)) { + assert(matchIndex+matchLength >= dictLimit); /* might be wrong if actually extDict */ + match = base + matchIndex; + matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend); + } else { + match = dictBase + matchIndex; + matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); + if (matchIndex+matchLength >= dictLimit) + match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ + } + + if (matchLength > bestLength) { + bestLength = matchLength; + if (matchLength > matchEndIdx - matchIndex) + matchEndIdx = matchIndex + (U32)matchLength; + } + + if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */ + break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */ + } + + if (match[matchLength] < ip[matchLength]) { /* necessarily within buffer */ + /* match is smaller than current */ + *smallerPtr = matchIndex; /* update smaller idx */ + commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ + if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop searching */ + smallerPtr = nextPtr+1; /* new "candidate" => larger than match, which was smaller than target */ + matchIndex = nextPtr[1]; /* new matchIndex, larger than previous and closer to current */ + } else { + /* match is larger than current */ + *largerPtr = matchIndex; + commonLengthLarger = matchLength; + if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop searching */ + largerPtr = nextPtr; + matchIndex = nextPtr[0]; + } } + + *smallerPtr = *largerPtr = 0; + { U32 positions = 0; + if (bestLength > 384) positions = MIN(192, (U32)(bestLength - 384)); /* speed optimization */ + assert(matchEndIdx > curr + 8); + return MAX(positions, matchEndIdx - (curr + 8)); + } +} + +FORCE_INLINE_TEMPLATE +ZSTD_ALLOW_POINTER_OVERFLOW_ATTR +void ZSTD_updateTree_internal( + ZSTD_matchState_t* ms, + const BYTE* const ip, const BYTE* const iend, + const U32 mls, const ZSTD_dictMode_e dictMode) +{ + const BYTE* const base = ms->window.base; + U32 const target = (U32)(ip - base); + U32 idx = ms->nextToUpdate; + DEBUGLOG(7, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)", + idx, target, dictMode); + + while(idx < target) { + U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, target, mls, dictMode == ZSTD_extDict); + assert(idx < (U32)(idx + forward)); + idx += forward; + } + assert((size_t)(ip - base) <= (size_t)(U32)(-1)); + assert((size_t)(iend - base) <= (size_t)(U32)(-1)); + ms->nextToUpdate = target; +} + +void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) { + ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.minMatch, ZSTD_noDict); +} + +FORCE_INLINE_TEMPLATE +ZSTD_ALLOW_POINTER_OVERFLOW_ATTR +U32 +ZSTD_insertBtAndGetAllMatches ( + ZSTD_match_t* matches, /* store result (found matches) in this table (presumed large enough) */ + ZSTD_matchState_t* ms, + U32* nextToUpdate3, + const BYTE* const ip, const BYTE* const iLimit, + const ZSTD_dictMode_e dictMode, + const U32 rep[ZSTD_REP_NUM], + const U32 ll0, /* tells if associated literal length is 0 or not. This value must be 0 or 1 */ + const U32 lengthToBeat, + const U32 mls /* template */) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1); + const BYTE* const base = ms->window.base; + U32 const curr = (U32)(ip-base); + U32 const hashLog = cParams->hashLog; + U32 const minMatch = (mls==3) ? 3 : 4; + U32* const hashTable = ms->hashTable; + size_t const h = ZSTD_hashPtr(ip, hashLog, mls); + U32 matchIndex = hashTable[h]; + U32* const bt = ms->chainTable; + U32 const btLog = cParams->chainLog - 1; + U32 const btMask= (1U << btLog) - 1; + size_t commonLengthSmaller=0, commonLengthLarger=0; + const BYTE* const dictBase = ms->window.dictBase; + U32 const dictLimit = ms->window.dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const prefixStart = base + dictLimit; + U32 const btLow = (btMask >= curr) ? 0 : curr - btMask; + U32 const windowLow = ZSTD_getLowestMatchIndex(ms, curr, cParams->windowLog); + U32 const matchLow = windowLow ? windowLow : 1; + U32* smallerPtr = bt + 2*(curr&btMask); + U32* largerPtr = bt + 2*(curr&btMask) + 1; + U32 matchEndIdx = curr+8+1; /* farthest referenced position of any match => detects repetitive patterns */ + U32 dummy32; /* to be nullified at the end */ + U32 mnum = 0; + U32 nbCompares = 1U << cParams->searchLog; + + const ZSTD_matchState_t* dms = dictMode == ZSTD_dictMatchState ? ms->dictMatchState : NULL; + const ZSTD_compressionParameters* const dmsCParams = + dictMode == ZSTD_dictMatchState ? &dms->cParams : NULL; + const BYTE* const dmsBase = dictMode == ZSTD_dictMatchState ? dms->window.base : NULL; + const BYTE* const dmsEnd = dictMode == ZSTD_dictMatchState ? dms->window.nextSrc : NULL; + U32 const dmsHighLimit = dictMode == ZSTD_dictMatchState ? (U32)(dmsEnd - dmsBase) : 0; + U32 const dmsLowLimit = dictMode == ZSTD_dictMatchState ? dms->window.lowLimit : 0; + U32 const dmsIndexDelta = dictMode == ZSTD_dictMatchState ? windowLow - dmsHighLimit : 0; + U32 const dmsHashLog = dictMode == ZSTD_dictMatchState ? dmsCParams->hashLog : hashLog; + U32 const dmsBtLog = dictMode == ZSTD_dictMatchState ? dmsCParams->chainLog - 1 : btLog; + U32 const dmsBtMask = dictMode == ZSTD_dictMatchState ? (1U << dmsBtLog) - 1 : 0; + U32 const dmsBtLow = dictMode == ZSTD_dictMatchState && dmsBtMask < dmsHighLimit - dmsLowLimit ? dmsHighLimit - dmsBtMask : dmsLowLimit; + + size_t bestLength = lengthToBeat-1; + DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", curr); + + /* check repCode */ + assert(ll0 <= 1); /* necessarily 1 or 0 */ + { U32 const lastR = ZSTD_REP_NUM + ll0; + U32 repCode; + for (repCode = ll0; repCode < lastR; repCode++) { + U32 const repOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode]; + U32 const repIndex = curr - repOffset; + U32 repLen = 0; + assert(curr >= dictLimit); + if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < curr-dictLimit) { /* equivalent to `curr > repIndex >= dictLimit` */ + /* We must validate the repcode offset because when we're using a dictionary the + * valid offset range shrinks when the dictionary goes out of bounds. + */ + if ((repIndex >= windowLow) & (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch))) { + repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch; + } + } else { /* repIndex < dictLimit || repIndex >= curr */ + const BYTE* const repMatch = dictMode == ZSTD_dictMatchState ? + dmsBase + repIndex - dmsIndexDelta : + dictBase + repIndex; + assert(curr >= windowLow); + if ( dictMode == ZSTD_extDict + && ( ((repOffset-1) /*intentional overflow*/ < curr - windowLow) /* equivalent to `curr > repIndex >= windowLow` */ + & (((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */) + && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) { + repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dictEnd, prefixStart) + minMatch; + } + if (dictMode == ZSTD_dictMatchState + && ( ((repOffset-1) /*intentional overflow*/ < curr - (dmsLowLimit + dmsIndexDelta)) /* equivalent to `curr > repIndex >= dmsLowLimit` */ + & ((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */ + && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) { + repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dmsEnd, prefixStart) + minMatch; + } } + /* save longer solution */ + if (repLen > bestLength) { + DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u", + repCode, ll0, repOffset, repLen); + bestLength = repLen; + matches[mnum].off = REPCODE_TO_OFFBASE(repCode - ll0 + 1); /* expect value between 1 and 3 */ + matches[mnum].len = (U32)repLen; + mnum++; + if ( (repLen > sufficient_len) + | (ip+repLen == iLimit) ) { /* best possible */ + return mnum; + } } } } + + /* HC3 match finder */ + if ((mls == 3) /*static*/ && (bestLength < mls)) { + U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, nextToUpdate3, ip); + if ((matchIndex3 >= matchLow) + & (curr - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) { + size_t mlen; + if ((dictMode == ZSTD_noDict) /*static*/ || (dictMode == ZSTD_dictMatchState) /*static*/ || (matchIndex3 >= dictLimit)) { + const BYTE* const match = base + matchIndex3; + mlen = ZSTD_count(ip, match, iLimit); + } else { + const BYTE* const match = dictBase + matchIndex3; + mlen = ZSTD_count_2segments(ip, match, iLimit, dictEnd, prefixStart); + } + + /* save best solution */ + if (mlen >= mls /* == 3 > bestLength */) { + DEBUGLOG(8, "found small match with hlog3, of length %u", + (U32)mlen); + bestLength = mlen; + assert(curr > matchIndex3); + assert(mnum==0); /* no prior solution */ + matches[0].off = OFFSET_TO_OFFBASE(curr - matchIndex3); + matches[0].len = (U32)mlen; + mnum = 1; + if ( (mlen > sufficient_len) | + (ip+mlen == iLimit) ) { /* best possible length */ + ms->nextToUpdate = curr+1; /* skip insertion */ + return 1; + } } } + /* no dictMatchState lookup: dicts don't have a populated HC3 table */ + } /* if (mls == 3) */ + + hashTable[h] = curr; /* Update Hash Table */ + + for (; nbCompares && (matchIndex >= matchLow); --nbCompares) { + U32* const nextPtr = bt + 2*(matchIndex & btMask); + const BYTE* match; + size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ + assert(curr > matchIndex); + + if ((dictMode == ZSTD_noDict) || (dictMode == ZSTD_dictMatchState) || (matchIndex+matchLength >= dictLimit)) { + assert(matchIndex+matchLength >= dictLimit); /* ensure the condition is correct when !extDict */ + match = base + matchIndex; + if (matchIndex >= dictLimit) assert(memcmp(match, ip, matchLength) == 0); /* ensure early section of match is equal as expected */ + matchLength += ZSTD_count(ip+matchLength, match+matchLength, iLimit); + } else { + match = dictBase + matchIndex; + assert(memcmp(match, ip, matchLength) == 0); /* ensure early section of match is equal as expected */ + matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart); + if (matchIndex+matchLength >= dictLimit) + match = base + matchIndex; /* prepare for match[matchLength] read */ + } + + if (matchLength > bestLength) { + DEBUGLOG(8, "found match of length %u at distance %u (offBase=%u)", + (U32)matchLength, curr - matchIndex, OFFSET_TO_OFFBASE(curr - matchIndex)); + assert(matchEndIdx > matchIndex); + if (matchLength > matchEndIdx - matchIndex) + matchEndIdx = matchIndex + (U32)matchLength; + bestLength = matchLength; + matches[mnum].off = OFFSET_TO_OFFBASE(curr - matchIndex); + matches[mnum].len = (U32)matchLength; + mnum++; + if ( (matchLength > ZSTD_OPT_NUM) + | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) { + if (dictMode == ZSTD_dictMatchState) nbCompares = 0; /* break should also skip searching dms */ + break; /* drop, to preserve bt consistency (miss a little bit of compression) */ + } } + + if (match[matchLength] < ip[matchLength]) { + /* match smaller than current */ + *smallerPtr = matchIndex; /* update smaller idx */ + commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ + if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + smallerPtr = nextPtr+1; /* new candidate => larger than match, which was smaller than current */ + matchIndex = nextPtr[1]; /* new matchIndex, larger than previous, closer to current */ + } else { + *largerPtr = matchIndex; + commonLengthLarger = matchLength; + if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + largerPtr = nextPtr; + matchIndex = nextPtr[0]; + } } + + *smallerPtr = *largerPtr = 0; + + assert(nbCompares <= (1U << ZSTD_SEARCHLOG_MAX)); /* Check we haven't underflowed. */ + if (dictMode == ZSTD_dictMatchState && nbCompares) { + size_t const dmsH = ZSTD_hashPtr(ip, dmsHashLog, mls); + U32 dictMatchIndex = dms->hashTable[dmsH]; + const U32* const dmsBt = dms->chainTable; + commonLengthSmaller = commonLengthLarger = 0; + for (; nbCompares && (dictMatchIndex > dmsLowLimit); --nbCompares) { + const U32* const nextPtr = dmsBt + 2*(dictMatchIndex & dmsBtMask); + size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ + const BYTE* match = dmsBase + dictMatchIndex; + matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dmsEnd, prefixStart); + if (dictMatchIndex+matchLength >= dmsHighLimit) + match = base + dictMatchIndex + dmsIndexDelta; /* to prepare for next usage of match[matchLength] */ + + if (matchLength > bestLength) { + matchIndex = dictMatchIndex + dmsIndexDelta; + DEBUGLOG(8, "found dms match of length %u at distance %u (offBase=%u)", + (U32)matchLength, curr - matchIndex, OFFSET_TO_OFFBASE(curr - matchIndex)); + if (matchLength > matchEndIdx - matchIndex) + matchEndIdx = matchIndex + (U32)matchLength; + bestLength = matchLength; + matches[mnum].off = OFFSET_TO_OFFBASE(curr - matchIndex); + matches[mnum].len = (U32)matchLength; + mnum++; + if ( (matchLength > ZSTD_OPT_NUM) + | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) { + break; /* drop, to guarantee consistency (miss a little bit of compression) */ + } } + + if (dictMatchIndex <= dmsBtLow) { break; } /* beyond tree size, stop the search */ + if (match[matchLength] < ip[matchLength]) { + commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ + dictMatchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ + } else { + /* match is larger than current */ + commonLengthLarger = matchLength; + dictMatchIndex = nextPtr[0]; + } } } /* if (dictMode == ZSTD_dictMatchState) */ + + assert(matchEndIdx > curr+8); + ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */ + return mnum; +} + +typedef U32 (*ZSTD_getAllMatchesFn)( + ZSTD_match_t*, + ZSTD_matchState_t*, + U32*, + const BYTE*, + const BYTE*, + const U32 rep[ZSTD_REP_NUM], + U32 const ll0, + U32 const lengthToBeat); + +FORCE_INLINE_TEMPLATE +ZSTD_ALLOW_POINTER_OVERFLOW_ATTR +U32 ZSTD_btGetAllMatches_internal( + ZSTD_match_t* matches, + ZSTD_matchState_t* ms, + U32* nextToUpdate3, + const BYTE* ip, + const BYTE* const iHighLimit, + const U32 rep[ZSTD_REP_NUM], + U32 const ll0, + U32 const lengthToBeat, + const ZSTD_dictMode_e dictMode, + const U32 mls) +{ + assert(BOUNDED(3, ms->cParams.minMatch, 6) == mls); + DEBUGLOG(8, "ZSTD_BtGetAllMatches(dictMode=%d, mls=%u)", (int)dictMode, mls); + if (ip < ms->window.base + ms->nextToUpdate) + return 0; /* skipped area */ + ZSTD_updateTree_internal(ms, ip, iHighLimit, mls, dictMode); + return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, mls); +} + +#define ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls) ZSTD_btGetAllMatches_##dictMode##_##mls + +#define GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, mls) \ + static U32 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls)( \ + ZSTD_match_t* matches, \ + ZSTD_matchState_t* ms, \ + U32* nextToUpdate3, \ + const BYTE* ip, \ + const BYTE* const iHighLimit, \ + const U32 rep[ZSTD_REP_NUM], \ + U32 const ll0, \ + U32 const lengthToBeat) \ + { \ + return ZSTD_btGetAllMatches_internal( \ + matches, ms, nextToUpdate3, ip, iHighLimit, \ + rep, ll0, lengthToBeat, ZSTD_##dictMode, mls); \ + } + +#define GEN_ZSTD_BT_GET_ALL_MATCHES(dictMode) \ + GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 3) \ + GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 4) \ + GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 5) \ + GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 6) + +GEN_ZSTD_BT_GET_ALL_MATCHES(noDict) +GEN_ZSTD_BT_GET_ALL_MATCHES(extDict) +GEN_ZSTD_BT_GET_ALL_MATCHES(dictMatchState) + +#define ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMode) \ + { \ + ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 3), \ + ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 4), \ + ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 5), \ + ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 6) \ + } + +static ZSTD_getAllMatchesFn +ZSTD_selectBtGetAllMatches(ZSTD_matchState_t const* ms, ZSTD_dictMode_e const dictMode) +{ + ZSTD_getAllMatchesFn const getAllMatchesFns[3][4] = { + ZSTD_BT_GET_ALL_MATCHES_ARRAY(noDict), + ZSTD_BT_GET_ALL_MATCHES_ARRAY(extDict), + ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMatchState) + }; + U32 const mls = BOUNDED(3, ms->cParams.minMatch, 6); + assert((U32)dictMode < 3); + assert(mls - 3 < 4); + return getAllMatchesFns[(int)dictMode][mls - 3]; +} + +/************************* +* LDM helper functions * +*************************/ + +/* Struct containing info needed to make decision about ldm inclusion */ +typedef struct { + rawSeqStore_t seqStore; /* External match candidates store for this block */ + U32 startPosInBlock; /* Start position of the current match candidate */ + U32 endPosInBlock; /* End position of the current match candidate */ + U32 offset; /* Offset of the match candidate */ +} ZSTD_optLdm_t; + +/* ZSTD_optLdm_skipRawSeqStoreBytes(): + * Moves forward in @rawSeqStore by @nbBytes, + * which will update the fields 'pos' and 'posInSequence'. + */ +static void ZSTD_optLdm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) +{ + U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes); + while (currPos && rawSeqStore->pos < rawSeqStore->size) { + rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos]; + if (currPos >= currSeq.litLength + currSeq.matchLength) { + currPos -= currSeq.litLength + currSeq.matchLength; + rawSeqStore->pos++; + } else { + rawSeqStore->posInSequence = currPos; + break; + } + } + if (currPos == 0 || rawSeqStore->pos == rawSeqStore->size) { + rawSeqStore->posInSequence = 0; + } +} + +/* ZSTD_opt_getNextMatchAndUpdateSeqStore(): + * Calculates the beginning and end of the next match in the current block. + * Updates 'pos' and 'posInSequence' of the ldmSeqStore. + */ +static void +ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 currPosInBlock, + U32 blockBytesRemaining) +{ + rawSeq currSeq; + U32 currBlockEndPos; + U32 literalsBytesRemaining; + U32 matchBytesRemaining; + + /* Setting match end position to MAX to ensure we never use an LDM during this block */ + if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) { + optLdm->startPosInBlock = UINT_MAX; + optLdm->endPosInBlock = UINT_MAX; + return; + } + /* Calculate appropriate bytes left in matchLength and litLength + * after adjusting based on ldmSeqStore->posInSequence */ + currSeq = optLdm->seqStore.seq[optLdm->seqStore.pos]; + assert(optLdm->seqStore.posInSequence <= currSeq.litLength + currSeq.matchLength); + currBlockEndPos = currPosInBlock + blockBytesRemaining; + literalsBytesRemaining = (optLdm->seqStore.posInSequence < currSeq.litLength) ? + currSeq.litLength - (U32)optLdm->seqStore.posInSequence : + 0; + matchBytesRemaining = (literalsBytesRemaining == 0) ? + currSeq.matchLength - ((U32)optLdm->seqStore.posInSequence - currSeq.litLength) : + currSeq.matchLength; + + /* If there are more literal bytes than bytes remaining in block, no ldm is possible */ + if (literalsBytesRemaining >= blockBytesRemaining) { + optLdm->startPosInBlock = UINT_MAX; + optLdm->endPosInBlock = UINT_MAX; + ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, blockBytesRemaining); + return; + } + + /* Matches may be < MINMATCH by this process. In that case, we will reject them + when we are deciding whether or not to add the ldm */ + optLdm->startPosInBlock = currPosInBlock + literalsBytesRemaining; + optLdm->endPosInBlock = optLdm->startPosInBlock + matchBytesRemaining; + optLdm->offset = currSeq.offset; + + if (optLdm->endPosInBlock > currBlockEndPos) { + /* Match ends after the block ends, we can't use the whole match */ + optLdm->endPosInBlock = currBlockEndPos; + ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, currBlockEndPos - currPosInBlock); + } else { + /* Consume nb of bytes equal to size of sequence left */ + ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, literalsBytesRemaining + matchBytesRemaining); + } +} + +/* ZSTD_optLdm_maybeAddMatch(): + * Adds a match if it's long enough, + * based on it's 'matchStartPosInBlock' and 'matchEndPosInBlock', + * into 'matches'. Maintains the correct ordering of 'matches'. + */ +static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches, + const ZSTD_optLdm_t* optLdm, U32 currPosInBlock) +{ + U32 const posDiff = currPosInBlock - optLdm->startPosInBlock; + /* Note: ZSTD_match_t actually contains offBase and matchLength (before subtracting MINMATCH) */ + U32 const candidateMatchLength = optLdm->endPosInBlock - optLdm->startPosInBlock - posDiff; + + /* Ensure that current block position is not outside of the match */ + if (currPosInBlock < optLdm->startPosInBlock + || currPosInBlock >= optLdm->endPosInBlock + || candidateMatchLength < MINMATCH) { + return; + } + + if (*nbMatches == 0 || ((candidateMatchLength > matches[*nbMatches-1].len) && *nbMatches < ZSTD_OPT_NUM)) { + U32 const candidateOffBase = OFFSET_TO_OFFBASE(optLdm->offset); + DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offBase: %u matchLength %u) at block position=%u", + candidateOffBase, candidateMatchLength, currPosInBlock); + matches[*nbMatches].len = candidateMatchLength; + matches[*nbMatches].off = candidateOffBase; + (*nbMatches)++; + } +} + +/* ZSTD_optLdm_processMatchCandidate(): + * Wrapper function to update ldm seq store and call ldm functions as necessary. + */ +static void +ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm, + ZSTD_match_t* matches, U32* nbMatches, + U32 currPosInBlock, U32 remainingBytes) +{ + if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) { + return; + } + + if (currPosInBlock >= optLdm->endPosInBlock) { + if (currPosInBlock > optLdm->endPosInBlock) { + /* The position at which ZSTD_optLdm_processMatchCandidate() is called is not necessarily + * at the end of a match from the ldm seq store, and will often be some bytes + * over beyond matchEndPosInBlock. As such, we need to correct for these "overshoots" + */ + U32 const posOvershoot = currPosInBlock - optLdm->endPosInBlock; + ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, posOvershoot); + } + ZSTD_opt_getNextMatchAndUpdateSeqStore(optLdm, currPosInBlock, remainingBytes); + } + ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock); +} + + +/*-******************************* +* Optimal parser +*********************************/ + +#if 0 /* debug */ + +static void +listStats(const U32* table, int lastEltID) +{ + int const nbElts = lastEltID + 1; + int enb; + for (enb=0; enb < nbElts; enb++) { + (void)table; + /* RAWLOG(2, "%3i:%3i, ", enb, table[enb]); */ + RAWLOG(2, "%4i,", table[enb]); + } + RAWLOG(2, " \n"); +} + +#endif + +#define LIT_PRICE(_p) (int)ZSTD_rawLiteralsCost(_p, 1, optStatePtr, optLevel) +#define LL_PRICE(_l) (int)ZSTD_litLengthPrice(_l, optStatePtr, optLevel) +#define LL_INCPRICE(_l) (LL_PRICE(_l) - LL_PRICE(_l-1)) + +FORCE_INLINE_TEMPLATE +ZSTD_ALLOW_POINTER_OVERFLOW_ATTR +size_t +ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, + seqStore_t* seqStore, + U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize, + const int optLevel, + const ZSTD_dictMode_e dictMode) +{ + optState_t* const optStatePtr = &ms->opt; + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - 8; + const BYTE* const base = ms->window.base; + const BYTE* const prefixStart = base + ms->window.dictLimit; + const ZSTD_compressionParameters* const cParams = &ms->cParams; + + ZSTD_getAllMatchesFn getAllMatches = ZSTD_selectBtGetAllMatches(ms, dictMode); + + U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1); + U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4; + U32 nextToUpdate3 = ms->nextToUpdate; + + ZSTD_optimal_t* const opt = optStatePtr->priceTable; + ZSTD_match_t* const matches = optStatePtr->matchTable; + ZSTD_optimal_t lastStretch; + ZSTD_optLdm_t optLdm; + + ZSTD_memset(&lastStretch, 0, sizeof(ZSTD_optimal_t)); + + optLdm.seqStore = ms->ldmSeqStore ? *ms->ldmSeqStore : kNullRawSeqStore; + optLdm.endPosInBlock = optLdm.startPosInBlock = optLdm.offset = 0; + ZSTD_opt_getNextMatchAndUpdateSeqStore(&optLdm, (U32)(ip-istart), (U32)(iend-ip)); + + /* init */ + DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u", + (U32)(ip - base), ms->window.dictLimit, ms->nextToUpdate); + assert(optLevel <= 2); + ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel); + ip += (ip==prefixStart); + + /* Match Loop */ + while (ip < ilimit) { + U32 cur, last_pos = 0; + + /* find first match */ + { U32 const litlen = (U32)(ip - anchor); + U32 const ll0 = !litlen; + U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, ip, iend, rep, ll0, minMatch); + ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches, + (U32)(ip-istart), (U32)(iend-ip)); + if (!nbMatches) { + DEBUGLOG(8, "no match found at cPos %u", (unsigned)(ip-istart)); + ip++; + continue; + } + + /* Match found: let's store this solution, and eventually find more candidates. + * During this forward pass, @opt is used to store stretches, + * defined as "a match followed by N literals". + * Note how this is different from a Sequence, which is "N literals followed by a match". + * Storing stretches allows us to store different match predecessors + * for each literal position part of a literals run. */ + + /* initialize opt[0] */ + opt[0].mlen = 0; /* there are only literals so far */ + opt[0].litlen = litlen; + /* No need to include the actual price of the literals before the first match + * because it is static for the duration of the forward pass, and is included + * in every subsequent price. But, we include the literal length because + * the cost variation of litlen depends on the value of litlen. + */ + opt[0].price = LL_PRICE(litlen); + ZSTD_STATIC_ASSERT(sizeof(opt[0].rep[0]) == sizeof(rep[0])); + ZSTD_memcpy(&opt[0].rep, rep, sizeof(opt[0].rep)); + + /* large match -> immediate encoding */ + { U32 const maxML = matches[nbMatches-1].len; + U32 const maxOffBase = matches[nbMatches-1].off; + DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffBase=%u at cPos=%u => start new series", + nbMatches, maxML, maxOffBase, (U32)(ip-prefixStart)); + + if (maxML > sufficient_len) { + lastStretch.litlen = 0; + lastStretch.mlen = maxML; + lastStretch.off = maxOffBase; + DEBUGLOG(6, "large match (%u>%u) => immediate encoding", + maxML, sufficient_len); + cur = 0; + last_pos = maxML; + goto _shortestPath; + } } + + /* set prices for first matches starting position == 0 */ + assert(opt[0].price >= 0); + { U32 pos; + U32 matchNb; + for (pos = 1; pos < minMatch; pos++) { + opt[pos].price = ZSTD_MAX_PRICE; + opt[pos].mlen = 0; + opt[pos].litlen = litlen + pos; + } + for (matchNb = 0; matchNb < nbMatches; matchNb++) { + U32 const offBase = matches[matchNb].off; + U32 const end = matches[matchNb].len; + for ( ; pos <= end ; pos++ ) { + int const matchPrice = (int)ZSTD_getMatchPrice(offBase, pos, optStatePtr, optLevel); + int const sequencePrice = opt[0].price + matchPrice; + DEBUGLOG(7, "rPos:%u => set initial price : %.2f", + pos, ZSTD_fCost(sequencePrice)); + opt[pos].mlen = pos; + opt[pos].off = offBase; + opt[pos].litlen = 0; /* end of match */ + opt[pos].price = sequencePrice + LL_PRICE(0); + } + } + last_pos = pos-1; + opt[pos].price = ZSTD_MAX_PRICE; + } + } + + /* check further positions */ + for (cur = 1; cur <= last_pos; cur++) { + const BYTE* const inr = ip + cur; + assert(cur <= ZSTD_OPT_NUM); + DEBUGLOG(7, "cPos:%zi==rPos:%u", inr-istart, cur); + + /* Fix current position with one literal if cheaper */ + { U32 const litlen = opt[cur-1].litlen + 1; + int const price = opt[cur-1].price + + LIT_PRICE(ip+cur-1) + + LL_INCPRICE(litlen); + assert(price < 1000000000); /* overflow check */ + if (price <= opt[cur].price) { + ZSTD_optimal_t const prevMatch = opt[cur]; + DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)", + inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen, + opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]); + opt[cur] = opt[cur-1]; + opt[cur].litlen = litlen; + opt[cur].price = price; + if ( (optLevel >= 1) /* additional check only for higher modes */ + && (prevMatch.litlen == 0) /* replace a match */ + && (LL_INCPRICE(1) < 0) /* ll1 is cheaper than ll0 */ + && LIKELY(ip + cur < iend) + ) { + /* check next position, in case it would be cheaper */ + int with1literal = prevMatch.price + LIT_PRICE(ip+cur) + LL_INCPRICE(1); + int withMoreLiterals = price + LIT_PRICE(ip+cur) + LL_INCPRICE(litlen+1); + DEBUGLOG(7, "then at next rPos %u : match+1lit %.2f vs %ulits %.2f", + cur+1, ZSTD_fCost(with1literal), litlen+1, ZSTD_fCost(withMoreLiterals)); + if ( (with1literal < withMoreLiterals) + && (with1literal < opt[cur+1].price) ) { + /* update offset history - before it disappears */ + U32 const prev = cur - prevMatch.mlen; + repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, prevMatch.off, opt[prev].litlen==0); + assert(cur >= prevMatch.mlen); + DEBUGLOG(7, "==> match+1lit is cheaper (%.2f < %.2f) (hist:%u,%u,%u) !", + ZSTD_fCost(with1literal), ZSTD_fCost(withMoreLiterals), + newReps.rep[0], newReps.rep[1], newReps.rep[2] ); + opt[cur+1] = prevMatch; /* mlen & offbase */ + ZSTD_memcpy(opt[cur+1].rep, &newReps, sizeof(repcodes_t)); + opt[cur+1].litlen = 1; + opt[cur+1].price = with1literal; + if (last_pos < cur+1) last_pos = cur+1; + } + } + } else { + DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f)", + inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price)); + } + } + + /* Offset history is not updated during match comparison. + * Do it here, now that the match is selected and confirmed. + */ + ZSTD_STATIC_ASSERT(sizeof(opt[cur].rep) == sizeof(repcodes_t)); + assert(cur >= opt[cur].mlen); + if (opt[cur].litlen == 0) { + /* just finished a match => alter offset history */ + U32 const prev = cur - opt[cur].mlen; + repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, opt[cur].off, opt[prev].litlen==0); + ZSTD_memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t)); + } + + /* last match must start at a minimum distance of 8 from oend */ + if (inr > ilimit) continue; + + if (cur == last_pos) break; + + if ( (optLevel==0) /*static_test*/ + && (opt[cur+1].price <= opt[cur].price + (BITCOST_MULTIPLIER/2)) ) { + DEBUGLOG(7, "skip current position : next rPos(%u) price is cheaper", cur+1); + continue; /* skip unpromising positions; about ~+6% speed, -0.01 ratio */ + } + + assert(opt[cur].price >= 0); + { U32 const ll0 = (opt[cur].litlen == 0); + int const previousPrice = opt[cur].price; + int const basePrice = previousPrice + LL_PRICE(0); + U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, inr, iend, opt[cur].rep, ll0, minMatch); + U32 matchNb; + + ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches, + (U32)(inr-istart), (U32)(iend-inr)); + + if (!nbMatches) { + DEBUGLOG(7, "rPos:%u : no match found", cur); + continue; + } + + { U32 const longestML = matches[nbMatches-1].len; + DEBUGLOG(7, "cPos:%zi==rPos:%u, found %u matches, of longest ML=%u", + inr-istart, cur, nbMatches, longestML); + + if ( (longestML > sufficient_len) + || (cur + longestML >= ZSTD_OPT_NUM) + || (ip + cur + longestML >= iend) ) { + lastStretch.mlen = longestML; + lastStretch.off = matches[nbMatches-1].off; + lastStretch.litlen = 0; + last_pos = cur + longestML; + goto _shortestPath; + } } + + /* set prices using matches found at position == cur */ + for (matchNb = 0; matchNb < nbMatches; matchNb++) { + U32 const offset = matches[matchNb].off; + U32 const lastML = matches[matchNb].len; + U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch; + U32 mlen; + + DEBUGLOG(7, "testing match %u => offBase=%4u, mlen=%2u, llen=%2u", + matchNb, matches[matchNb].off, lastML, opt[cur].litlen); + + for (mlen = lastML; mlen >= startML; mlen--) { /* scan downward */ + U32 const pos = cur + mlen; + int const price = basePrice + (int)ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel); + + if ((pos > last_pos) || (price < opt[pos].price)) { + DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)", + pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price)); + while (last_pos < pos) { + /* fill empty positions, for future comparisons */ + last_pos++; + opt[last_pos].price = ZSTD_MAX_PRICE; + opt[last_pos].litlen = !0; /* just needs to be != 0, to mean "not an end of match" */ + } + opt[pos].mlen = mlen; + opt[pos].off = offset; + opt[pos].litlen = 0; + opt[pos].price = price; + } else { + DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)", + pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price)); + if (optLevel==0) break; /* early update abort; gets ~+10% speed for about -0.01 ratio loss */ + } + } } } + opt[last_pos+1].price = ZSTD_MAX_PRICE; + } /* for (cur = 1; cur <= last_pos; cur++) */ + + lastStretch = opt[last_pos]; + assert(cur >= lastStretch.mlen); + cur = last_pos - lastStretch.mlen; + +_shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */ + assert(opt[0].mlen == 0); + assert(last_pos >= lastStretch.mlen); + assert(cur == last_pos - lastStretch.mlen); + + if (lastStretch.mlen==0) { + /* no solution : all matches have been converted into literals */ + assert(lastStretch.litlen == (ip - anchor) + last_pos); + ip += last_pos; + continue; + } + assert(lastStretch.off > 0); + + /* Update offset history */ + if (lastStretch.litlen == 0) { + /* finishing on a match : update offset history */ + repcodes_t const reps = ZSTD_newRep(opt[cur].rep, lastStretch.off, opt[cur].litlen==0); + ZSTD_memcpy(rep, &reps, sizeof(repcodes_t)); + } else { + ZSTD_memcpy(rep, lastStretch.rep, sizeof(repcodes_t)); + assert(cur >= lastStretch.litlen); + cur -= lastStretch.litlen; + } + + /* Let's write the shortest path solution. + * It is stored in @opt in reverse order, + * starting from @storeEnd (==cur+2), + * effectively partially @opt overwriting. + * Content is changed too: + * - So far, @opt stored stretches, aka a match followed by literals + * - Now, it will store sequences, aka literals followed by a match + */ + { U32 const storeEnd = cur + 2; + U32 storeStart = storeEnd; + U32 stretchPos = cur; + + DEBUGLOG(6, "start reverse traversal (last_pos:%u, cur:%u)", + last_pos, cur); (void)last_pos; + assert(storeEnd < ZSTD_OPT_SIZE); + DEBUGLOG(6, "last stretch copied into pos=%u (llen=%u,mlen=%u,ofc=%u)", + storeEnd, lastStretch.litlen, lastStretch.mlen, lastStretch.off); + if (lastStretch.litlen > 0) { + /* last "sequence" is unfinished: just a bunch of literals */ + opt[storeEnd].litlen = lastStretch.litlen; + opt[storeEnd].mlen = 0; + storeStart = storeEnd-1; + opt[storeStart] = lastStretch; + } { + opt[storeEnd] = lastStretch; /* note: litlen will be fixed */ + storeStart = storeEnd; + } + while (1) { + ZSTD_optimal_t nextStretch = opt[stretchPos]; + opt[storeStart].litlen = nextStretch.litlen; + DEBUGLOG(6, "selected sequence (llen=%u,mlen=%u,ofc=%u)", + opt[storeStart].litlen, opt[storeStart].mlen, opt[storeStart].off); + if (nextStretch.mlen == 0) { + /* reaching beginning of segment */ + break; + } + storeStart--; + opt[storeStart] = nextStretch; /* note: litlen will be fixed */ + assert(nextStretch.litlen + nextStretch.mlen <= stretchPos); + stretchPos -= nextStretch.litlen + nextStretch.mlen; + } + + /* save sequences */ + DEBUGLOG(6, "sending selected sequences into seqStore"); + { U32 storePos; + for (storePos=storeStart; storePos <= storeEnd; storePos++) { + U32 const llen = opt[storePos].litlen; + U32 const mlen = opt[storePos].mlen; + U32 const offBase = opt[storePos].off; + U32 const advance = llen + mlen; + DEBUGLOG(6, "considering seq starting at %zi, llen=%u, mlen=%u", + anchor - istart, (unsigned)llen, (unsigned)mlen); + + if (mlen==0) { /* only literals => must be last "sequence", actually starting a new stream of sequences */ + assert(storePos == storeEnd); /* must be last sequence */ + ip = anchor + llen; /* last "sequence" is a bunch of literals => don't progress anchor */ + continue; /* will finish */ + } + + assert(anchor + llen <= iend); + ZSTD_updateStats(optStatePtr, llen, anchor, offBase, mlen); + ZSTD_storeSeq(seqStore, llen, anchor, iend, offBase, mlen); + anchor += advance; + ip = anchor; + } } + DEBUGLOG(7, "new offset history : %u, %u, %u", rep[0], rep[1], rep[2]); + + /* update all costs */ + ZSTD_setBasePrices(optStatePtr, optLevel); + } + } /* while (ip < ilimit) */ + + /* Return the last literals size */ + return (size_t)(iend - anchor); +} +#endif /* build exclusions */ + +#ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR +static size_t ZSTD_compressBlock_opt0( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode) +{ + return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /* optLevel */, dictMode); +} +#endif + +#ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR +static size_t ZSTD_compressBlock_opt2( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode) +{ + return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /* optLevel */, dictMode); +} +#endif + +#ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR +size_t ZSTD_compressBlock_btopt( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize) +{ + DEBUGLOG(5, "ZSTD_compressBlock_btopt"); + return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_noDict); +} +#endif + + + + +#ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR +/* ZSTD_initStats_ultra(): + * make a first compression pass, just to seed stats with more accurate starting values. + * only works on first block, with no dictionary and no ldm. + * this function cannot error out, its narrow contract must be respected. + */ +static +ZSTD_ALLOW_POINTER_OVERFLOW_ATTR +void ZSTD_initStats_ultra(ZSTD_matchState_t* ms, + seqStore_t* seqStore, + U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize) +{ + U32 tmpRep[ZSTD_REP_NUM]; /* updated rep codes will sink here */ + ZSTD_memcpy(tmpRep, rep, sizeof(tmpRep)); + + DEBUGLOG(4, "ZSTD_initStats_ultra (srcSize=%zu)", srcSize); + assert(ms->opt.litLengthSum == 0); /* first block */ + assert(seqStore->sequences == seqStore->sequencesStart); /* no ldm */ + assert(ms->window.dictLimit == ms->window.lowLimit); /* no dictionary */ + assert(ms->window.dictLimit - ms->nextToUpdate <= 1); /* no prefix (note: intentional overflow, defined as 2-complement) */ + + ZSTD_compressBlock_opt2(ms, seqStore, tmpRep, src, srcSize, ZSTD_noDict); /* generate stats into ms->opt*/ + + /* invalidate first scan from history, only keep entropy stats */ + ZSTD_resetSeqStore(seqStore); + ms->window.base -= srcSize; + ms->window.dictLimit += (U32)srcSize; + ms->window.lowLimit = ms->window.dictLimit; + ms->nextToUpdate = ms->window.dictLimit; + +} + +size_t ZSTD_compressBlock_btultra( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize) +{ + DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize); + return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict); +} + +size_t ZSTD_compressBlock_btultra2( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize) +{ + U32 const curr = (U32)((const BYTE*)src - ms->window.base); + DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize); + + /* 2-passes strategy: + * this strategy makes a first pass over first block to collect statistics + * in order to seed next round's statistics with it. + * After 1st pass, function forgets history, and starts a new block. + * Consequently, this can only work if no data has been previously loaded in tables, + * aka, no dictionary, no prefix, no ldm preprocessing. + * The compression ratio gain is generally small (~0.5% on first block), + * the cost is 2x cpu time on first block. */ + assert(srcSize <= ZSTD_BLOCKSIZE_MAX); + if ( (ms->opt.litLengthSum==0) /* first block */ + && (seqStore->sequences == seqStore->sequencesStart) /* no ldm */ + && (ms->window.dictLimit == ms->window.lowLimit) /* no dictionary */ + && (curr == ms->window.dictLimit) /* start of frame, nothing already loaded nor skipped */ + && (srcSize > ZSTD_PREDEF_THRESHOLD) /* input large enough to not employ default stats */ + ) { + ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize); + } + + return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict); +} +#endif + +#ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR +size_t ZSTD_compressBlock_btopt_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize) +{ + return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState); +} + +size_t ZSTD_compressBlock_btopt_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize) +{ + return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_extDict); +} +#endif + +#ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR +size_t ZSTD_compressBlock_btultra_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize) +{ + return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState); +} + +size_t ZSTD_compressBlock_btultra_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize) +{ + return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_extDict); +} +#endif + +/* note : no btultra2 variant for extDict nor dictMatchState, + * because btultra2 is not meant to work with dictionaries + * and is only specific for the first block (no prefix) */ diff --git a/c-blosc/internal-complibs/zstd-1.5.6/compress/zstd_opt.h b/c-blosc/internal-complibs/zstd-1.5.6/compress/zstd_opt.h new file mode 100644 index 0000000..d4e7113 --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.5.6/compress/zstd_opt.h @@ -0,0 +1,80 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_OPT_H +#define ZSTD_OPT_H + +#if defined (__cplusplus) +extern "C" { +#endif + +#include "zstd_compress_internal.h" + +#if !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR) \ + || !defined(ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR) \ + || !defined(ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR) +/* used in ZSTD_loadDictionaryContent() */ +void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend); +#endif + +#ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR +size_t ZSTD_compressBlock_btopt( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_btopt_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_btopt_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); + +#define ZSTD_COMPRESSBLOCK_BTOPT ZSTD_compressBlock_btopt +#define ZSTD_COMPRESSBLOCK_BTOPT_DICTMATCHSTATE ZSTD_compressBlock_btopt_dictMatchState +#define ZSTD_COMPRESSBLOCK_BTOPT_EXTDICT ZSTD_compressBlock_btopt_extDict +#else +#define ZSTD_COMPRESSBLOCK_BTOPT NULL +#define ZSTD_COMPRESSBLOCK_BTOPT_DICTMATCHSTATE NULL +#define ZSTD_COMPRESSBLOCK_BTOPT_EXTDICT NULL +#endif + +#ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR +size_t ZSTD_compressBlock_btultra( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_btultra_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_btultra_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); + + /* note : no btultra2 variant for extDict nor dictMatchState, + * because btultra2 is not meant to work with dictionaries + * and is only specific for the first block (no prefix) */ +size_t ZSTD_compressBlock_btultra2( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); + +#define ZSTD_COMPRESSBLOCK_BTULTRA ZSTD_compressBlock_btultra +#define ZSTD_COMPRESSBLOCK_BTULTRA_DICTMATCHSTATE ZSTD_compressBlock_btultra_dictMatchState +#define ZSTD_COMPRESSBLOCK_BTULTRA_EXTDICT ZSTD_compressBlock_btultra_extDict +#define ZSTD_COMPRESSBLOCK_BTULTRA2 ZSTD_compressBlock_btultra2 +#else +#define ZSTD_COMPRESSBLOCK_BTULTRA NULL +#define ZSTD_COMPRESSBLOCK_BTULTRA_DICTMATCHSTATE NULL +#define ZSTD_COMPRESSBLOCK_BTULTRA_EXTDICT NULL +#define ZSTD_COMPRESSBLOCK_BTULTRA2 NULL +#endif + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_OPT_H */ diff --git a/c-blosc/internal-complibs/zstd-1.5.6/compress/zstdmt_compress.c b/c-blosc/internal-complibs/zstd-1.5.6/compress/zstdmt_compress.c new file mode 100644 index 0000000..86ccce3 --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.5.6/compress/zstdmt_compress.c @@ -0,0 +1,1882 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +/* ====== Compiler specifics ====== */ +#if defined(_MSC_VER) +# pragma warning(disable : 4204) /* disable: C4204: non-constant aggregate initializer */ +#endif + + +/* ====== Dependencies ====== */ +#include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customCalloc, ZSTD_customFree */ +#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memset, INT_MAX, UINT_MAX */ +#include "../common/mem.h" /* MEM_STATIC */ +#include "../common/pool.h" /* threadpool */ +#include "../common/threading.h" /* mutex */ +#include "zstd_compress_internal.h" /* MIN, ERROR, ZSTD_*, ZSTD_highbit32 */ +#include "zstd_ldm.h" +#include "zstdmt_compress.h" + +/* Guards code to support resizing the SeqPool. + * We will want to resize the SeqPool to save memory in the future. + * Until then, comment the code out since it is unused. + */ +#define ZSTD_RESIZE_SEQPOOL 0 + +/* ====== Debug ====== */ +#if defined(DEBUGLEVEL) && (DEBUGLEVEL>=2) \ + && !defined(_MSC_VER) \ + && !defined(__MINGW32__) + +# include +# include +# include + +# define DEBUG_PRINTHEX(l,p,n) \ + do { \ + unsigned debug_u; \ + for (debug_u=0; debug_u<(n); debug_u++) \ + RAWLOG(l, "%02X ", ((const unsigned char*)(p))[debug_u]); \ + RAWLOG(l, " \n"); \ + } while (0) + +static unsigned long long GetCurrentClockTimeMicroseconds(void) +{ + static clock_t _ticksPerSecond = 0; + if (_ticksPerSecond <= 0) _ticksPerSecond = sysconf(_SC_CLK_TCK); + + { struct tms junk; clock_t newTicks = (clock_t) times(&junk); + return ((((unsigned long long)newTicks)*(1000000))/_ticksPerSecond); +} } + +#define MUTEX_WAIT_TIME_DLEVEL 6 +#define ZSTD_PTHREAD_MUTEX_LOCK(mutex) \ + do { \ + if (DEBUGLEVEL >= MUTEX_WAIT_TIME_DLEVEL) { \ + unsigned long long const beforeTime = GetCurrentClockTimeMicroseconds(); \ + ZSTD_pthread_mutex_lock(mutex); \ + { unsigned long long const afterTime = GetCurrentClockTimeMicroseconds(); \ + unsigned long long const elapsedTime = (afterTime-beforeTime); \ + if (elapsedTime > 1000) { \ + /* or whatever threshold you like; I'm using 1 millisecond here */ \ + DEBUGLOG(MUTEX_WAIT_TIME_DLEVEL, \ + "Thread took %llu microseconds to acquire mutex %s \n", \ + elapsedTime, #mutex); \ + } } \ + } else { \ + ZSTD_pthread_mutex_lock(mutex); \ + } \ + } while (0) + +#else + +# define ZSTD_PTHREAD_MUTEX_LOCK(m) ZSTD_pthread_mutex_lock(m) +# define DEBUG_PRINTHEX(l,p,n) do { } while (0) + +#endif + + +/* ===== Buffer Pool ===== */ +/* a single Buffer Pool can be invoked from multiple threads in parallel */ + +typedef struct buffer_s { + void* start; + size_t capacity; +} buffer_t; + +static const buffer_t g_nullBuffer = { NULL, 0 }; + +typedef struct ZSTDMT_bufferPool_s { + ZSTD_pthread_mutex_t poolMutex; + size_t bufferSize; + unsigned totalBuffers; + unsigned nbBuffers; + ZSTD_customMem cMem; + buffer_t* buffers; +} ZSTDMT_bufferPool; + +static void ZSTDMT_freeBufferPool(ZSTDMT_bufferPool* bufPool) +{ + DEBUGLOG(3, "ZSTDMT_freeBufferPool (address:%08X)", (U32)(size_t)bufPool); + if (!bufPool) return; /* compatibility with free on NULL */ + if (bufPool->buffers) { + unsigned u; + for (u=0; utotalBuffers; u++) { + DEBUGLOG(4, "free buffer %2u (address:%08X)", u, (U32)(size_t)bufPool->buffers[u].start); + ZSTD_customFree(bufPool->buffers[u].start, bufPool->cMem); + } + ZSTD_customFree(bufPool->buffers, bufPool->cMem); + } + ZSTD_pthread_mutex_destroy(&bufPool->poolMutex); + ZSTD_customFree(bufPool, bufPool->cMem); +} + +static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned maxNbBuffers, ZSTD_customMem cMem) +{ + ZSTDMT_bufferPool* const bufPool = + (ZSTDMT_bufferPool*)ZSTD_customCalloc(sizeof(ZSTDMT_bufferPool), cMem); + if (bufPool==NULL) return NULL; + if (ZSTD_pthread_mutex_init(&bufPool->poolMutex, NULL)) { + ZSTD_customFree(bufPool, cMem); + return NULL; + } + bufPool->buffers = (buffer_t*)ZSTD_customCalloc(maxNbBuffers * sizeof(buffer_t), cMem); + if (bufPool->buffers==NULL) { + ZSTDMT_freeBufferPool(bufPool); + return NULL; + } + bufPool->bufferSize = 64 KB; + bufPool->totalBuffers = maxNbBuffers; + bufPool->nbBuffers = 0; + bufPool->cMem = cMem; + return bufPool; +} + +/* only works at initialization, not during compression */ +static size_t ZSTDMT_sizeof_bufferPool(ZSTDMT_bufferPool* bufPool) +{ + size_t const poolSize = sizeof(*bufPool); + size_t const arraySize = bufPool->totalBuffers * sizeof(buffer_t); + unsigned u; + size_t totalBufferSize = 0; + ZSTD_pthread_mutex_lock(&bufPool->poolMutex); + for (u=0; utotalBuffers; u++) + totalBufferSize += bufPool->buffers[u].capacity; + ZSTD_pthread_mutex_unlock(&bufPool->poolMutex); + + return poolSize + arraySize + totalBufferSize; +} + +/* ZSTDMT_setBufferSize() : + * all future buffers provided by this buffer pool will have _at least_ this size + * note : it's better for all buffers to have same size, + * as they become freely interchangeable, reducing malloc/free usages and memory fragmentation */ +static void ZSTDMT_setBufferSize(ZSTDMT_bufferPool* const bufPool, size_t const bSize) +{ + ZSTD_pthread_mutex_lock(&bufPool->poolMutex); + DEBUGLOG(4, "ZSTDMT_setBufferSize: bSize = %u", (U32)bSize); + bufPool->bufferSize = bSize; + ZSTD_pthread_mutex_unlock(&bufPool->poolMutex); +} + + +static ZSTDMT_bufferPool* ZSTDMT_expandBufferPool(ZSTDMT_bufferPool* srcBufPool, unsigned maxNbBuffers) +{ + if (srcBufPool==NULL) return NULL; + if (srcBufPool->totalBuffers >= maxNbBuffers) /* good enough */ + return srcBufPool; + /* need a larger buffer pool */ + { ZSTD_customMem const cMem = srcBufPool->cMem; + size_t const bSize = srcBufPool->bufferSize; /* forward parameters */ + ZSTDMT_bufferPool* newBufPool; + ZSTDMT_freeBufferPool(srcBufPool); + newBufPool = ZSTDMT_createBufferPool(maxNbBuffers, cMem); + if (newBufPool==NULL) return newBufPool; + ZSTDMT_setBufferSize(newBufPool, bSize); + return newBufPool; + } +} + +/** ZSTDMT_getBuffer() : + * assumption : bufPool must be valid + * @return : a buffer, with start pointer and size + * note: allocation may fail, in this case, start==NULL and size==0 */ +static buffer_t ZSTDMT_getBuffer(ZSTDMT_bufferPool* bufPool) +{ + size_t const bSize = bufPool->bufferSize; + DEBUGLOG(5, "ZSTDMT_getBuffer: bSize = %u", (U32)bufPool->bufferSize); + ZSTD_pthread_mutex_lock(&bufPool->poolMutex); + if (bufPool->nbBuffers) { /* try to use an existing buffer */ + buffer_t const buf = bufPool->buffers[--(bufPool->nbBuffers)]; + size_t const availBufferSize = buf.capacity; + bufPool->buffers[bufPool->nbBuffers] = g_nullBuffer; + if ((availBufferSize >= bSize) & ((availBufferSize>>3) <= bSize)) { + /* large enough, but not too much */ + DEBUGLOG(5, "ZSTDMT_getBuffer: provide buffer %u of size %u", + bufPool->nbBuffers, (U32)buf.capacity); + ZSTD_pthread_mutex_unlock(&bufPool->poolMutex); + return buf; + } + /* size conditions not respected : scratch this buffer, create new one */ + DEBUGLOG(5, "ZSTDMT_getBuffer: existing buffer does not meet size conditions => freeing"); + ZSTD_customFree(buf.start, bufPool->cMem); + } + ZSTD_pthread_mutex_unlock(&bufPool->poolMutex); + /* create new buffer */ + DEBUGLOG(5, "ZSTDMT_getBuffer: create a new buffer"); + { buffer_t buffer; + void* const start = ZSTD_customMalloc(bSize, bufPool->cMem); + buffer.start = start; /* note : start can be NULL if malloc fails ! */ + buffer.capacity = (start==NULL) ? 0 : bSize; + if (start==NULL) { + DEBUGLOG(5, "ZSTDMT_getBuffer: buffer allocation failure !!"); + } else { + DEBUGLOG(5, "ZSTDMT_getBuffer: created buffer of size %u", (U32)bSize); + } + return buffer; + } +} + +#if ZSTD_RESIZE_SEQPOOL +/** ZSTDMT_resizeBuffer() : + * assumption : bufPool must be valid + * @return : a buffer that is at least the buffer pool buffer size. + * If a reallocation happens, the data in the input buffer is copied. + */ +static buffer_t ZSTDMT_resizeBuffer(ZSTDMT_bufferPool* bufPool, buffer_t buffer) +{ + size_t const bSize = bufPool->bufferSize; + if (buffer.capacity < bSize) { + void* const start = ZSTD_customMalloc(bSize, bufPool->cMem); + buffer_t newBuffer; + newBuffer.start = start; + newBuffer.capacity = start == NULL ? 0 : bSize; + if (start != NULL) { + assert(newBuffer.capacity >= buffer.capacity); + ZSTD_memcpy(newBuffer.start, buffer.start, buffer.capacity); + DEBUGLOG(5, "ZSTDMT_resizeBuffer: created buffer of size %u", (U32)bSize); + return newBuffer; + } + DEBUGLOG(5, "ZSTDMT_resizeBuffer: buffer allocation failure !!"); + } + return buffer; +} +#endif + +/* store buffer for later re-use, up to pool capacity */ +static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* bufPool, buffer_t buf) +{ + DEBUGLOG(5, "ZSTDMT_releaseBuffer"); + if (buf.start == NULL) return; /* compatible with release on NULL */ + ZSTD_pthread_mutex_lock(&bufPool->poolMutex); + if (bufPool->nbBuffers < bufPool->totalBuffers) { + bufPool->buffers[bufPool->nbBuffers++] = buf; /* stored for later use */ + DEBUGLOG(5, "ZSTDMT_releaseBuffer: stored buffer of size %u in slot %u", + (U32)buf.capacity, (U32)(bufPool->nbBuffers-1)); + ZSTD_pthread_mutex_unlock(&bufPool->poolMutex); + return; + } + ZSTD_pthread_mutex_unlock(&bufPool->poolMutex); + /* Reached bufferPool capacity (note: should not happen) */ + DEBUGLOG(5, "ZSTDMT_releaseBuffer: pool capacity reached => freeing "); + ZSTD_customFree(buf.start, bufPool->cMem); +} + +/* We need 2 output buffers per worker since each dstBuff must be flushed after it is released. + * The 3 additional buffers are as follows: + * 1 buffer for input loading + * 1 buffer for "next input" when submitting current one + * 1 buffer stuck in queue */ +#define BUF_POOL_MAX_NB_BUFFERS(nbWorkers) (2*(nbWorkers) + 3) + +/* After a worker releases its rawSeqStore, it is immediately ready for reuse. + * So we only need one seq buffer per worker. */ +#define SEQ_POOL_MAX_NB_BUFFERS(nbWorkers) (nbWorkers) + +/* ===== Seq Pool Wrapper ====== */ + +typedef ZSTDMT_bufferPool ZSTDMT_seqPool; + +static size_t ZSTDMT_sizeof_seqPool(ZSTDMT_seqPool* seqPool) +{ + return ZSTDMT_sizeof_bufferPool(seqPool); +} + +static rawSeqStore_t bufferToSeq(buffer_t buffer) +{ + rawSeqStore_t seq = kNullRawSeqStore; + seq.seq = (rawSeq*)buffer.start; + seq.capacity = buffer.capacity / sizeof(rawSeq); + return seq; +} + +static buffer_t seqToBuffer(rawSeqStore_t seq) +{ + buffer_t buffer; + buffer.start = seq.seq; + buffer.capacity = seq.capacity * sizeof(rawSeq); + return buffer; +} + +static rawSeqStore_t ZSTDMT_getSeq(ZSTDMT_seqPool* seqPool) +{ + if (seqPool->bufferSize == 0) { + return kNullRawSeqStore; + } + return bufferToSeq(ZSTDMT_getBuffer(seqPool)); +} + +#if ZSTD_RESIZE_SEQPOOL +static rawSeqStore_t ZSTDMT_resizeSeq(ZSTDMT_seqPool* seqPool, rawSeqStore_t seq) +{ + return bufferToSeq(ZSTDMT_resizeBuffer(seqPool, seqToBuffer(seq))); +} +#endif + +static void ZSTDMT_releaseSeq(ZSTDMT_seqPool* seqPool, rawSeqStore_t seq) +{ + ZSTDMT_releaseBuffer(seqPool, seqToBuffer(seq)); +} + +static void ZSTDMT_setNbSeq(ZSTDMT_seqPool* const seqPool, size_t const nbSeq) +{ + ZSTDMT_setBufferSize(seqPool, nbSeq * sizeof(rawSeq)); +} + +static ZSTDMT_seqPool* ZSTDMT_createSeqPool(unsigned nbWorkers, ZSTD_customMem cMem) +{ + ZSTDMT_seqPool* const seqPool = ZSTDMT_createBufferPool(SEQ_POOL_MAX_NB_BUFFERS(nbWorkers), cMem); + if (seqPool == NULL) return NULL; + ZSTDMT_setNbSeq(seqPool, 0); + return seqPool; +} + +static void ZSTDMT_freeSeqPool(ZSTDMT_seqPool* seqPool) +{ + ZSTDMT_freeBufferPool(seqPool); +} + +static ZSTDMT_seqPool* ZSTDMT_expandSeqPool(ZSTDMT_seqPool* pool, U32 nbWorkers) +{ + return ZSTDMT_expandBufferPool(pool, SEQ_POOL_MAX_NB_BUFFERS(nbWorkers)); +} + + +/* ===== CCtx Pool ===== */ +/* a single CCtx Pool can be invoked from multiple threads in parallel */ + +typedef struct { + ZSTD_pthread_mutex_t poolMutex; + int totalCCtx; + int availCCtx; + ZSTD_customMem cMem; + ZSTD_CCtx** cctxs; +} ZSTDMT_CCtxPool; + +/* note : all CCtx borrowed from the pool must be reverted back to the pool _before_ freeing the pool */ +static void ZSTDMT_freeCCtxPool(ZSTDMT_CCtxPool* pool) +{ + if (!pool) return; + ZSTD_pthread_mutex_destroy(&pool->poolMutex); + if (pool->cctxs) { + int cid; + for (cid=0; cidtotalCCtx; cid++) + ZSTD_freeCCtx(pool->cctxs[cid]); /* free compatible with NULL */ + ZSTD_customFree(pool->cctxs, pool->cMem); + } + ZSTD_customFree(pool, pool->cMem); +} + +/* ZSTDMT_createCCtxPool() : + * implies nbWorkers >= 1 , checked by caller ZSTDMT_createCCtx() */ +static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(int nbWorkers, + ZSTD_customMem cMem) +{ + ZSTDMT_CCtxPool* const cctxPool = + (ZSTDMT_CCtxPool*) ZSTD_customCalloc(sizeof(ZSTDMT_CCtxPool), cMem); + assert(nbWorkers > 0); + if (!cctxPool) return NULL; + if (ZSTD_pthread_mutex_init(&cctxPool->poolMutex, NULL)) { + ZSTD_customFree(cctxPool, cMem); + return NULL; + } + cctxPool->totalCCtx = nbWorkers; + cctxPool->cctxs = (ZSTD_CCtx**)ZSTD_customCalloc(nbWorkers * sizeof(ZSTD_CCtx*), cMem); + if (!cctxPool->cctxs) { + ZSTDMT_freeCCtxPool(cctxPool); + return NULL; + } + cctxPool->cMem = cMem; + cctxPool->cctxs[0] = ZSTD_createCCtx_advanced(cMem); + if (!cctxPool->cctxs[0]) { ZSTDMT_freeCCtxPool(cctxPool); return NULL; } + cctxPool->availCCtx = 1; /* at least one cctx for single-thread mode */ + DEBUGLOG(3, "cctxPool created, with %u workers", nbWorkers); + return cctxPool; +} + +static ZSTDMT_CCtxPool* ZSTDMT_expandCCtxPool(ZSTDMT_CCtxPool* srcPool, + int nbWorkers) +{ + if (srcPool==NULL) return NULL; + if (nbWorkers <= srcPool->totalCCtx) return srcPool; /* good enough */ + /* need a larger cctx pool */ + { ZSTD_customMem const cMem = srcPool->cMem; + ZSTDMT_freeCCtxPool(srcPool); + return ZSTDMT_createCCtxPool(nbWorkers, cMem); + } +} + +/* only works during initialization phase, not during compression */ +static size_t ZSTDMT_sizeof_CCtxPool(ZSTDMT_CCtxPool* cctxPool) +{ + ZSTD_pthread_mutex_lock(&cctxPool->poolMutex); + { unsigned const nbWorkers = cctxPool->totalCCtx; + size_t const poolSize = sizeof(*cctxPool); + size_t const arraySize = cctxPool->totalCCtx * sizeof(ZSTD_CCtx*); + size_t totalCCtxSize = 0; + unsigned u; + for (u=0; ucctxs[u]); + } + ZSTD_pthread_mutex_unlock(&cctxPool->poolMutex); + assert(nbWorkers > 0); + return poolSize + arraySize + totalCCtxSize; + } +} + +static ZSTD_CCtx* ZSTDMT_getCCtx(ZSTDMT_CCtxPool* cctxPool) +{ + DEBUGLOG(5, "ZSTDMT_getCCtx"); + ZSTD_pthread_mutex_lock(&cctxPool->poolMutex); + if (cctxPool->availCCtx) { + cctxPool->availCCtx--; + { ZSTD_CCtx* const cctx = cctxPool->cctxs[cctxPool->availCCtx]; + ZSTD_pthread_mutex_unlock(&cctxPool->poolMutex); + return cctx; + } } + ZSTD_pthread_mutex_unlock(&cctxPool->poolMutex); + DEBUGLOG(5, "create one more CCtx"); + return ZSTD_createCCtx_advanced(cctxPool->cMem); /* note : can be NULL, when creation fails ! */ +} + +static void ZSTDMT_releaseCCtx(ZSTDMT_CCtxPool* pool, ZSTD_CCtx* cctx) +{ + if (cctx==NULL) return; /* compatibility with release on NULL */ + ZSTD_pthread_mutex_lock(&pool->poolMutex); + if (pool->availCCtx < pool->totalCCtx) + pool->cctxs[pool->availCCtx++] = cctx; + else { + /* pool overflow : should not happen, since totalCCtx==nbWorkers */ + DEBUGLOG(4, "CCtx pool overflow : free cctx"); + ZSTD_freeCCtx(cctx); + } + ZSTD_pthread_mutex_unlock(&pool->poolMutex); +} + +/* ==== Serial State ==== */ + +typedef struct { + void const* start; + size_t size; +} range_t; + +typedef struct { + /* All variables in the struct are protected by mutex. */ + ZSTD_pthread_mutex_t mutex; + ZSTD_pthread_cond_t cond; + ZSTD_CCtx_params params; + ldmState_t ldmState; + XXH64_state_t xxhState; + unsigned nextJobID; + /* Protects ldmWindow. + * Must be acquired after the main mutex when acquiring both. + */ + ZSTD_pthread_mutex_t ldmWindowMutex; + ZSTD_pthread_cond_t ldmWindowCond; /* Signaled when ldmWindow is updated */ + ZSTD_window_t ldmWindow; /* A thread-safe copy of ldmState.window */ +} serialState_t; + +static int +ZSTDMT_serialState_reset(serialState_t* serialState, + ZSTDMT_seqPool* seqPool, + ZSTD_CCtx_params params, + size_t jobSize, + const void* dict, size_t const dictSize, + ZSTD_dictContentType_e dictContentType) +{ + /* Adjust parameters */ + if (params.ldmParams.enableLdm == ZSTD_ps_enable) { + DEBUGLOG(4, "LDM window size = %u KB", (1U << params.cParams.windowLog) >> 10); + ZSTD_ldm_adjustParameters(¶ms.ldmParams, ¶ms.cParams); + assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog); + assert(params.ldmParams.hashRateLog < 32); + } else { + ZSTD_memset(¶ms.ldmParams, 0, sizeof(params.ldmParams)); + } + serialState->nextJobID = 0; + if (params.fParams.checksumFlag) + XXH64_reset(&serialState->xxhState, 0); + if (params.ldmParams.enableLdm == ZSTD_ps_enable) { + ZSTD_customMem cMem = params.customMem; + unsigned const hashLog = params.ldmParams.hashLog; + size_t const hashSize = ((size_t)1 << hashLog) * sizeof(ldmEntry_t); + unsigned const bucketLog = + params.ldmParams.hashLog - params.ldmParams.bucketSizeLog; + unsigned const prevBucketLog = + serialState->params.ldmParams.hashLog - + serialState->params.ldmParams.bucketSizeLog; + size_t const numBuckets = (size_t)1 << bucketLog; + /* Size the seq pool tables */ + ZSTDMT_setNbSeq(seqPool, ZSTD_ldm_getMaxNbSeq(params.ldmParams, jobSize)); + /* Reset the window */ + ZSTD_window_init(&serialState->ldmState.window); + /* Resize tables and output space if necessary. */ + if (serialState->ldmState.hashTable == NULL || serialState->params.ldmParams.hashLog < hashLog) { + ZSTD_customFree(serialState->ldmState.hashTable, cMem); + serialState->ldmState.hashTable = (ldmEntry_t*)ZSTD_customMalloc(hashSize, cMem); + } + if (serialState->ldmState.bucketOffsets == NULL || prevBucketLog < bucketLog) { + ZSTD_customFree(serialState->ldmState.bucketOffsets, cMem); + serialState->ldmState.bucketOffsets = (BYTE*)ZSTD_customMalloc(numBuckets, cMem); + } + if (!serialState->ldmState.hashTable || !serialState->ldmState.bucketOffsets) + return 1; + /* Zero the tables */ + ZSTD_memset(serialState->ldmState.hashTable, 0, hashSize); + ZSTD_memset(serialState->ldmState.bucketOffsets, 0, numBuckets); + + /* Update window state and fill hash table with dict */ + serialState->ldmState.loadedDictEnd = 0; + if (dictSize > 0) { + if (dictContentType == ZSTD_dct_rawContent) { + BYTE const* const dictEnd = (const BYTE*)dict + dictSize; + ZSTD_window_update(&serialState->ldmState.window, dict, dictSize, /* forceNonContiguous */ 0); + ZSTD_ldm_fillHashTable(&serialState->ldmState, (const BYTE*)dict, dictEnd, ¶ms.ldmParams); + serialState->ldmState.loadedDictEnd = params.forceWindow ? 0 : (U32)(dictEnd - serialState->ldmState.window.base); + } else { + /* don't even load anything */ + } + } + + /* Initialize serialState's copy of ldmWindow. */ + serialState->ldmWindow = serialState->ldmState.window; + } + + serialState->params = params; + serialState->params.jobSize = (U32)jobSize; + return 0; +} + +static int ZSTDMT_serialState_init(serialState_t* serialState) +{ + int initError = 0; + ZSTD_memset(serialState, 0, sizeof(*serialState)); + initError |= ZSTD_pthread_mutex_init(&serialState->mutex, NULL); + initError |= ZSTD_pthread_cond_init(&serialState->cond, NULL); + initError |= ZSTD_pthread_mutex_init(&serialState->ldmWindowMutex, NULL); + initError |= ZSTD_pthread_cond_init(&serialState->ldmWindowCond, NULL); + return initError; +} + +static void ZSTDMT_serialState_free(serialState_t* serialState) +{ + ZSTD_customMem cMem = serialState->params.customMem; + ZSTD_pthread_mutex_destroy(&serialState->mutex); + ZSTD_pthread_cond_destroy(&serialState->cond); + ZSTD_pthread_mutex_destroy(&serialState->ldmWindowMutex); + ZSTD_pthread_cond_destroy(&serialState->ldmWindowCond); + ZSTD_customFree(serialState->ldmState.hashTable, cMem); + ZSTD_customFree(serialState->ldmState.bucketOffsets, cMem); +} + +static void ZSTDMT_serialState_update(serialState_t* serialState, + ZSTD_CCtx* jobCCtx, rawSeqStore_t seqStore, + range_t src, unsigned jobID) +{ + /* Wait for our turn */ + ZSTD_PTHREAD_MUTEX_LOCK(&serialState->mutex); + while (serialState->nextJobID < jobID) { + DEBUGLOG(5, "wait for serialState->cond"); + ZSTD_pthread_cond_wait(&serialState->cond, &serialState->mutex); + } + /* A future job may error and skip our job */ + if (serialState->nextJobID == jobID) { + /* It is now our turn, do any processing necessary */ + if (serialState->params.ldmParams.enableLdm == ZSTD_ps_enable) { + size_t error; + assert(seqStore.seq != NULL && seqStore.pos == 0 && + seqStore.size == 0 && seqStore.capacity > 0); + assert(src.size <= serialState->params.jobSize); + ZSTD_window_update(&serialState->ldmState.window, src.start, src.size, /* forceNonContiguous */ 0); + error = ZSTD_ldm_generateSequences( + &serialState->ldmState, &seqStore, + &serialState->params.ldmParams, src.start, src.size); + /* We provide a large enough buffer to never fail. */ + assert(!ZSTD_isError(error)); (void)error; + /* Update ldmWindow to match the ldmState.window and signal the main + * thread if it is waiting for a buffer. + */ + ZSTD_PTHREAD_MUTEX_LOCK(&serialState->ldmWindowMutex); + serialState->ldmWindow = serialState->ldmState.window; + ZSTD_pthread_cond_signal(&serialState->ldmWindowCond); + ZSTD_pthread_mutex_unlock(&serialState->ldmWindowMutex); + } + if (serialState->params.fParams.checksumFlag && src.size > 0) + XXH64_update(&serialState->xxhState, src.start, src.size); + } + /* Now it is the next jobs turn */ + serialState->nextJobID++; + ZSTD_pthread_cond_broadcast(&serialState->cond); + ZSTD_pthread_mutex_unlock(&serialState->mutex); + + if (seqStore.size > 0) { + ZSTD_referenceExternalSequences(jobCCtx, seqStore.seq, seqStore.size); + assert(serialState->params.ldmParams.enableLdm == ZSTD_ps_enable); + } +} + +static void ZSTDMT_serialState_ensureFinished(serialState_t* serialState, + unsigned jobID, size_t cSize) +{ + ZSTD_PTHREAD_MUTEX_LOCK(&serialState->mutex); + if (serialState->nextJobID <= jobID) { + assert(ZSTD_isError(cSize)); (void)cSize; + DEBUGLOG(5, "Skipping past job %u because of error", jobID); + serialState->nextJobID = jobID + 1; + ZSTD_pthread_cond_broadcast(&serialState->cond); + + ZSTD_PTHREAD_MUTEX_LOCK(&serialState->ldmWindowMutex); + ZSTD_window_clear(&serialState->ldmWindow); + ZSTD_pthread_cond_signal(&serialState->ldmWindowCond); + ZSTD_pthread_mutex_unlock(&serialState->ldmWindowMutex); + } + ZSTD_pthread_mutex_unlock(&serialState->mutex); + +} + + +/* ------------------------------------------ */ +/* ===== Worker thread ===== */ +/* ------------------------------------------ */ + +static const range_t kNullRange = { NULL, 0 }; + +typedef struct { + size_t consumed; /* SHARED - set0 by mtctx, then modified by worker AND read by mtctx */ + size_t cSize; /* SHARED - set0 by mtctx, then modified by worker AND read by mtctx, then set0 by mtctx */ + ZSTD_pthread_mutex_t job_mutex; /* Thread-safe - used by mtctx and worker */ + ZSTD_pthread_cond_t job_cond; /* Thread-safe - used by mtctx and worker */ + ZSTDMT_CCtxPool* cctxPool; /* Thread-safe - used by mtctx and (all) workers */ + ZSTDMT_bufferPool* bufPool; /* Thread-safe - used by mtctx and (all) workers */ + ZSTDMT_seqPool* seqPool; /* Thread-safe - used by mtctx and (all) workers */ + serialState_t* serial; /* Thread-safe - used by mtctx and (all) workers */ + buffer_t dstBuff; /* set by worker (or mtctx), then read by worker & mtctx, then modified by mtctx => no barrier */ + range_t prefix; /* set by mtctx, then read by worker & mtctx => no barrier */ + range_t src; /* set by mtctx, then read by worker & mtctx => no barrier */ + unsigned jobID; /* set by mtctx, then read by worker => no barrier */ + unsigned firstJob; /* set by mtctx, then read by worker => no barrier */ + unsigned lastJob; /* set by mtctx, then read by worker => no barrier */ + ZSTD_CCtx_params params; /* set by mtctx, then read by worker => no barrier */ + const ZSTD_CDict* cdict; /* set by mtctx, then read by worker => no barrier */ + unsigned long long fullFrameSize; /* set by mtctx, then read by worker => no barrier */ + size_t dstFlushed; /* used only by mtctx */ + unsigned frameChecksumNeeded; /* used only by mtctx */ +} ZSTDMT_jobDescription; + +#define JOB_ERROR(e) \ + do { \ + ZSTD_PTHREAD_MUTEX_LOCK(&job->job_mutex); \ + job->cSize = e; \ + ZSTD_pthread_mutex_unlock(&job->job_mutex); \ + goto _endJob; \ + } while (0) + +/* ZSTDMT_compressionJob() is a POOL_function type */ +static void ZSTDMT_compressionJob(void* jobDescription) +{ + ZSTDMT_jobDescription* const job = (ZSTDMT_jobDescription*)jobDescription; + ZSTD_CCtx_params jobParams = job->params; /* do not modify job->params ! copy it, modify the copy */ + ZSTD_CCtx* const cctx = ZSTDMT_getCCtx(job->cctxPool); + rawSeqStore_t rawSeqStore = ZSTDMT_getSeq(job->seqPool); + buffer_t dstBuff = job->dstBuff; + size_t lastCBlockSize = 0; + + /* resources */ + if (cctx==NULL) JOB_ERROR(ERROR(memory_allocation)); + if (dstBuff.start == NULL) { /* streaming job : doesn't provide a dstBuffer */ + dstBuff = ZSTDMT_getBuffer(job->bufPool); + if (dstBuff.start==NULL) JOB_ERROR(ERROR(memory_allocation)); + job->dstBuff = dstBuff; /* this value can be read in ZSTDMT_flush, when it copies the whole job */ + } + if (jobParams.ldmParams.enableLdm == ZSTD_ps_enable && rawSeqStore.seq == NULL) + JOB_ERROR(ERROR(memory_allocation)); + + /* Don't compute the checksum for chunks, since we compute it externally, + * but write it in the header. + */ + if (job->jobID != 0) jobParams.fParams.checksumFlag = 0; + /* Don't run LDM for the chunks, since we handle it externally */ + jobParams.ldmParams.enableLdm = ZSTD_ps_disable; + /* Correct nbWorkers to 0. */ + jobParams.nbWorkers = 0; + + + /* init */ + if (job->cdict) { + size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast, job->cdict, &jobParams, job->fullFrameSize); + assert(job->firstJob); /* only allowed for first job */ + if (ZSTD_isError(initError)) JOB_ERROR(initError); + } else { /* srcStart points at reloaded section */ + U64 const pledgedSrcSize = job->firstJob ? job->fullFrameSize : job->src.size; + { size_t const forceWindowError = ZSTD_CCtxParams_setParameter(&jobParams, ZSTD_c_forceMaxWindow, !job->firstJob); + if (ZSTD_isError(forceWindowError)) JOB_ERROR(forceWindowError); + } + if (!job->firstJob) { + size_t const err = ZSTD_CCtxParams_setParameter(&jobParams, ZSTD_c_deterministicRefPrefix, 0); + if (ZSTD_isError(err)) JOB_ERROR(err); + } + { size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, + job->prefix.start, job->prefix.size, ZSTD_dct_rawContent, /* load dictionary in "content-only" mode (no header analysis) */ + ZSTD_dtlm_fast, + NULL, /*cdict*/ + &jobParams, pledgedSrcSize); + if (ZSTD_isError(initError)) JOB_ERROR(initError); + } } + + /* Perform serial step as early as possible, but after CCtx initialization */ + ZSTDMT_serialState_update(job->serial, cctx, rawSeqStore, job->src, job->jobID); + + if (!job->firstJob) { /* flush and overwrite frame header when it's not first job */ + size_t const hSize = ZSTD_compressContinue_public(cctx, dstBuff.start, dstBuff.capacity, job->src.start, 0); + if (ZSTD_isError(hSize)) JOB_ERROR(hSize); + DEBUGLOG(5, "ZSTDMT_compressionJob: flush and overwrite %u bytes of frame header (not first job)", (U32)hSize); + ZSTD_invalidateRepCodes(cctx); + } + + /* compress */ + { size_t const chunkSize = 4*ZSTD_BLOCKSIZE_MAX; + int const nbChunks = (int)((job->src.size + (chunkSize-1)) / chunkSize); + const BYTE* ip = (const BYTE*) job->src.start; + BYTE* const ostart = (BYTE*)dstBuff.start; + BYTE* op = ostart; + BYTE* oend = op + dstBuff.capacity; + int chunkNb; + if (sizeof(size_t) > sizeof(int)) assert(job->src.size < ((size_t)INT_MAX) * chunkSize); /* check overflow */ + DEBUGLOG(5, "ZSTDMT_compressionJob: compress %u bytes in %i blocks", (U32)job->src.size, nbChunks); + assert(job->cSize == 0); + for (chunkNb = 1; chunkNb < nbChunks; chunkNb++) { + size_t const cSize = ZSTD_compressContinue_public(cctx, op, oend-op, ip, chunkSize); + if (ZSTD_isError(cSize)) JOB_ERROR(cSize); + ip += chunkSize; + op += cSize; assert(op < oend); + /* stats */ + ZSTD_PTHREAD_MUTEX_LOCK(&job->job_mutex); + job->cSize += cSize; + job->consumed = chunkSize * chunkNb; + DEBUGLOG(5, "ZSTDMT_compressionJob: compress new block : cSize==%u bytes (total: %u)", + (U32)cSize, (U32)job->cSize); + ZSTD_pthread_cond_signal(&job->job_cond); /* warns some more data is ready to be flushed */ + ZSTD_pthread_mutex_unlock(&job->job_mutex); + } + /* last block */ + assert(chunkSize > 0); + assert((chunkSize & (chunkSize - 1)) == 0); /* chunkSize must be power of 2 for mask==(chunkSize-1) to work */ + if ((nbChunks > 0) | job->lastJob /*must output a "last block" flag*/ ) { + size_t const lastBlockSize1 = job->src.size & (chunkSize-1); + size_t const lastBlockSize = ((lastBlockSize1==0) & (job->src.size>=chunkSize)) ? chunkSize : lastBlockSize1; + size_t const cSize = (job->lastJob) ? + ZSTD_compressEnd_public(cctx, op, oend-op, ip, lastBlockSize) : + ZSTD_compressContinue_public(cctx, op, oend-op, ip, lastBlockSize); + if (ZSTD_isError(cSize)) JOB_ERROR(cSize); + lastCBlockSize = cSize; + } } + if (!job->firstJob) { + /* Double check that we don't have an ext-dict, because then our + * repcode invalidation doesn't work. + */ + assert(!ZSTD_window_hasExtDict(cctx->blockState.matchState.window)); + } + ZSTD_CCtx_trace(cctx, 0); + +_endJob: + ZSTDMT_serialState_ensureFinished(job->serial, job->jobID, job->cSize); + if (job->prefix.size > 0) + DEBUGLOG(5, "Finished with prefix: %zx", (size_t)job->prefix.start); + DEBUGLOG(5, "Finished with source: %zx", (size_t)job->src.start); + /* release resources */ + ZSTDMT_releaseSeq(job->seqPool, rawSeqStore); + ZSTDMT_releaseCCtx(job->cctxPool, cctx); + /* report */ + ZSTD_PTHREAD_MUTEX_LOCK(&job->job_mutex); + if (ZSTD_isError(job->cSize)) assert(lastCBlockSize == 0); + job->cSize += lastCBlockSize; + job->consumed = job->src.size; /* when job->consumed == job->src.size , compression job is presumed completed */ + ZSTD_pthread_cond_signal(&job->job_cond); + ZSTD_pthread_mutex_unlock(&job->job_mutex); +} + + +/* ------------------------------------------ */ +/* ===== Multi-threaded compression ===== */ +/* ------------------------------------------ */ + +typedef struct { + range_t prefix; /* read-only non-owned prefix buffer */ + buffer_t buffer; + size_t filled; +} inBuff_t; + +typedef struct { + BYTE* buffer; /* The round input buffer. All jobs get references + * to pieces of the buffer. ZSTDMT_tryGetInputRange() + * handles handing out job input buffers, and makes + * sure it doesn't overlap with any pieces still in use. + */ + size_t capacity; /* The capacity of buffer. */ + size_t pos; /* The position of the current inBuff in the round + * buffer. Updated past the end if the inBuff once + * the inBuff is sent to the worker thread. + * pos <= capacity. + */ +} roundBuff_t; + +static const roundBuff_t kNullRoundBuff = {NULL, 0, 0}; + +#define RSYNC_LENGTH 32 +/* Don't create chunks smaller than the zstd block size. + * This stops us from regressing compression ratio too much, + * and ensures our output fits in ZSTD_compressBound(). + * + * If this is shrunk < ZSTD_BLOCKSIZELOG_MIN then + * ZSTD_COMPRESSBOUND() will need to be updated. + */ +#define RSYNC_MIN_BLOCK_LOG ZSTD_BLOCKSIZELOG_MAX +#define RSYNC_MIN_BLOCK_SIZE (1< one job is already prepared, but pool has shortage of workers. Don't create a new job. */ + inBuff_t inBuff; + roundBuff_t roundBuff; + serialState_t serial; + rsyncState_t rsync; + unsigned jobIDMask; + unsigned doneJobID; + unsigned nextJobID; + unsigned frameEnded; + unsigned allJobsCompleted; + unsigned long long frameContentSize; + unsigned long long consumed; + unsigned long long produced; + ZSTD_customMem cMem; + ZSTD_CDict* cdictLocal; + const ZSTD_CDict* cdict; + unsigned providedFactory: 1; +}; + +static void ZSTDMT_freeJobsTable(ZSTDMT_jobDescription* jobTable, U32 nbJobs, ZSTD_customMem cMem) +{ + U32 jobNb; + if (jobTable == NULL) return; + for (jobNb=0; jobNb mtctx->jobIDMask+1) { /* need more job capacity */ + ZSTDMT_freeJobsTable(mtctx->jobs, mtctx->jobIDMask+1, mtctx->cMem); + mtctx->jobIDMask = 0; + mtctx->jobs = ZSTDMT_createJobsTable(&nbJobs, mtctx->cMem); + if (mtctx->jobs==NULL) return ERROR(memory_allocation); + assert((nbJobs != 0) && ((nbJobs & (nbJobs - 1)) == 0)); /* ensure nbJobs is a power of 2 */ + mtctx->jobIDMask = nbJobs - 1; + } + return 0; +} + + +/* ZSTDMT_CCtxParam_setNbWorkers(): + * Internal use only */ +static size_t ZSTDMT_CCtxParam_setNbWorkers(ZSTD_CCtx_params* params, unsigned nbWorkers) +{ + return ZSTD_CCtxParams_setParameter(params, ZSTD_c_nbWorkers, (int)nbWorkers); +} + +MEM_STATIC ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced_internal(unsigned nbWorkers, ZSTD_customMem cMem, ZSTD_threadPool* pool) +{ + ZSTDMT_CCtx* mtctx; + U32 nbJobs = nbWorkers + 2; + int initError; + DEBUGLOG(3, "ZSTDMT_createCCtx_advanced (nbWorkers = %u)", nbWorkers); + + if (nbWorkers < 1) return NULL; + nbWorkers = MIN(nbWorkers , ZSTDMT_NBWORKERS_MAX); + if ((cMem.customAlloc!=NULL) ^ (cMem.customFree!=NULL)) + /* invalid custom allocator */ + return NULL; + + mtctx = (ZSTDMT_CCtx*) ZSTD_customCalloc(sizeof(ZSTDMT_CCtx), cMem); + if (!mtctx) return NULL; + ZSTDMT_CCtxParam_setNbWorkers(&mtctx->params, nbWorkers); + mtctx->cMem = cMem; + mtctx->allJobsCompleted = 1; + if (pool != NULL) { + mtctx->factory = pool; + mtctx->providedFactory = 1; + } + else { + mtctx->factory = POOL_create_advanced(nbWorkers, 0, cMem); + mtctx->providedFactory = 0; + } + mtctx->jobs = ZSTDMT_createJobsTable(&nbJobs, cMem); + assert(nbJobs > 0); assert((nbJobs & (nbJobs - 1)) == 0); /* ensure nbJobs is a power of 2 */ + mtctx->jobIDMask = nbJobs - 1; + mtctx->bufPool = ZSTDMT_createBufferPool(BUF_POOL_MAX_NB_BUFFERS(nbWorkers), cMem); + mtctx->cctxPool = ZSTDMT_createCCtxPool(nbWorkers, cMem); + mtctx->seqPool = ZSTDMT_createSeqPool(nbWorkers, cMem); + initError = ZSTDMT_serialState_init(&mtctx->serial); + mtctx->roundBuff = kNullRoundBuff; + if (!mtctx->factory | !mtctx->jobs | !mtctx->bufPool | !mtctx->cctxPool | !mtctx->seqPool | initError) { + ZSTDMT_freeCCtx(mtctx); + return NULL; + } + DEBUGLOG(3, "mt_cctx created, for %u threads", nbWorkers); + return mtctx; +} + +ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers, ZSTD_customMem cMem, ZSTD_threadPool* pool) +{ +#ifdef ZSTD_MULTITHREAD + return ZSTDMT_createCCtx_advanced_internal(nbWorkers, cMem, pool); +#else + (void)nbWorkers; + (void)cMem; + (void)pool; + return NULL; +#endif +} + + +/* ZSTDMT_releaseAllJobResources() : + * note : ensure all workers are killed first ! */ +static void ZSTDMT_releaseAllJobResources(ZSTDMT_CCtx* mtctx) +{ + unsigned jobID; + DEBUGLOG(3, "ZSTDMT_releaseAllJobResources"); + for (jobID=0; jobID <= mtctx->jobIDMask; jobID++) { + /* Copy the mutex/cond out */ + ZSTD_pthread_mutex_t const mutex = mtctx->jobs[jobID].job_mutex; + ZSTD_pthread_cond_t const cond = mtctx->jobs[jobID].job_cond; + + DEBUGLOG(4, "job%02u: release dst address %08X", jobID, (U32)(size_t)mtctx->jobs[jobID].dstBuff.start); + ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[jobID].dstBuff); + + /* Clear the job description, but keep the mutex/cond */ + ZSTD_memset(&mtctx->jobs[jobID], 0, sizeof(mtctx->jobs[jobID])); + mtctx->jobs[jobID].job_mutex = mutex; + mtctx->jobs[jobID].job_cond = cond; + } + mtctx->inBuff.buffer = g_nullBuffer; + mtctx->inBuff.filled = 0; + mtctx->allJobsCompleted = 1; +} + +static void ZSTDMT_waitForAllJobsCompleted(ZSTDMT_CCtx* mtctx) +{ + DEBUGLOG(4, "ZSTDMT_waitForAllJobsCompleted"); + while (mtctx->doneJobID < mtctx->nextJobID) { + unsigned const jobID = mtctx->doneJobID & mtctx->jobIDMask; + ZSTD_PTHREAD_MUTEX_LOCK(&mtctx->jobs[jobID].job_mutex); + while (mtctx->jobs[jobID].consumed < mtctx->jobs[jobID].src.size) { + DEBUGLOG(4, "waiting for jobCompleted signal from job %u", mtctx->doneJobID); /* we want to block when waiting for data to flush */ + ZSTD_pthread_cond_wait(&mtctx->jobs[jobID].job_cond, &mtctx->jobs[jobID].job_mutex); + } + ZSTD_pthread_mutex_unlock(&mtctx->jobs[jobID].job_mutex); + mtctx->doneJobID++; + } +} + +size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx) +{ + if (mtctx==NULL) return 0; /* compatible with free on NULL */ + if (!mtctx->providedFactory) + POOL_free(mtctx->factory); /* stop and free worker threads */ + ZSTDMT_releaseAllJobResources(mtctx); /* release job resources into pools first */ + ZSTDMT_freeJobsTable(mtctx->jobs, mtctx->jobIDMask+1, mtctx->cMem); + ZSTDMT_freeBufferPool(mtctx->bufPool); + ZSTDMT_freeCCtxPool(mtctx->cctxPool); + ZSTDMT_freeSeqPool(mtctx->seqPool); + ZSTDMT_serialState_free(&mtctx->serial); + ZSTD_freeCDict(mtctx->cdictLocal); + if (mtctx->roundBuff.buffer) + ZSTD_customFree(mtctx->roundBuff.buffer, mtctx->cMem); + ZSTD_customFree(mtctx, mtctx->cMem); + return 0; +} + +size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx) +{ + if (mtctx == NULL) return 0; /* supports sizeof NULL */ + return sizeof(*mtctx) + + POOL_sizeof(mtctx->factory) + + ZSTDMT_sizeof_bufferPool(mtctx->bufPool) + + (mtctx->jobIDMask+1) * sizeof(ZSTDMT_jobDescription) + + ZSTDMT_sizeof_CCtxPool(mtctx->cctxPool) + + ZSTDMT_sizeof_seqPool(mtctx->seqPool) + + ZSTD_sizeof_CDict(mtctx->cdictLocal) + + mtctx->roundBuff.capacity; +} + + +/* ZSTDMT_resize() : + * @return : error code if fails, 0 on success */ +static size_t ZSTDMT_resize(ZSTDMT_CCtx* mtctx, unsigned nbWorkers) +{ + if (POOL_resize(mtctx->factory, nbWorkers)) return ERROR(memory_allocation); + FORWARD_IF_ERROR( ZSTDMT_expandJobsTable(mtctx, nbWorkers) , ""); + mtctx->bufPool = ZSTDMT_expandBufferPool(mtctx->bufPool, BUF_POOL_MAX_NB_BUFFERS(nbWorkers)); + if (mtctx->bufPool == NULL) return ERROR(memory_allocation); + mtctx->cctxPool = ZSTDMT_expandCCtxPool(mtctx->cctxPool, nbWorkers); + if (mtctx->cctxPool == NULL) return ERROR(memory_allocation); + mtctx->seqPool = ZSTDMT_expandSeqPool(mtctx->seqPool, nbWorkers); + if (mtctx->seqPool == NULL) return ERROR(memory_allocation); + ZSTDMT_CCtxParam_setNbWorkers(&mtctx->params, nbWorkers); + return 0; +} + + +/*! ZSTDMT_updateCParams_whileCompressing() : + * Updates a selected set of compression parameters, remaining compatible with currently active frame. + * New parameters will be applied to next compression job. */ +void ZSTDMT_updateCParams_whileCompressing(ZSTDMT_CCtx* mtctx, const ZSTD_CCtx_params* cctxParams) +{ + U32 const saved_wlog = mtctx->params.cParams.windowLog; /* Do not modify windowLog while compressing */ + int const compressionLevel = cctxParams->compressionLevel; + DEBUGLOG(5, "ZSTDMT_updateCParams_whileCompressing (level:%i)", + compressionLevel); + mtctx->params.compressionLevel = compressionLevel; + { ZSTD_compressionParameters cParams = ZSTD_getCParamsFromCCtxParams(cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict); + cParams.windowLog = saved_wlog; + mtctx->params.cParams = cParams; + } +} + +/* ZSTDMT_getFrameProgression(): + * tells how much data has been consumed (input) and produced (output) for current frame. + * able to count progression inside worker threads. + * Note : mutex will be acquired during statistics collection inside workers. */ +ZSTD_frameProgression ZSTDMT_getFrameProgression(ZSTDMT_CCtx* mtctx) +{ + ZSTD_frameProgression fps; + DEBUGLOG(5, "ZSTDMT_getFrameProgression"); + fps.ingested = mtctx->consumed + mtctx->inBuff.filled; + fps.consumed = mtctx->consumed; + fps.produced = fps.flushed = mtctx->produced; + fps.currentJobID = mtctx->nextJobID; + fps.nbActiveWorkers = 0; + { unsigned jobNb; + unsigned lastJobNb = mtctx->nextJobID + mtctx->jobReady; assert(mtctx->jobReady <= 1); + DEBUGLOG(6, "ZSTDMT_getFrameProgression: jobs: from %u to <%u (jobReady:%u)", + mtctx->doneJobID, lastJobNb, mtctx->jobReady); + for (jobNb = mtctx->doneJobID ; jobNb < lastJobNb ; jobNb++) { + unsigned const wJobID = jobNb & mtctx->jobIDMask; + ZSTDMT_jobDescription* jobPtr = &mtctx->jobs[wJobID]; + ZSTD_pthread_mutex_lock(&jobPtr->job_mutex); + { size_t const cResult = jobPtr->cSize; + size_t const produced = ZSTD_isError(cResult) ? 0 : cResult; + size_t const flushed = ZSTD_isError(cResult) ? 0 : jobPtr->dstFlushed; + assert(flushed <= produced); + fps.ingested += jobPtr->src.size; + fps.consumed += jobPtr->consumed; + fps.produced += produced; + fps.flushed += flushed; + fps.nbActiveWorkers += (jobPtr->consumed < jobPtr->src.size); + } + ZSTD_pthread_mutex_unlock(&mtctx->jobs[wJobID].job_mutex); + } + } + return fps; +} + + +size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx) +{ + size_t toFlush; + unsigned const jobID = mtctx->doneJobID; + assert(jobID <= mtctx->nextJobID); + if (jobID == mtctx->nextJobID) return 0; /* no active job => nothing to flush */ + + /* look into oldest non-fully-flushed job */ + { unsigned const wJobID = jobID & mtctx->jobIDMask; + ZSTDMT_jobDescription* const jobPtr = &mtctx->jobs[wJobID]; + ZSTD_pthread_mutex_lock(&jobPtr->job_mutex); + { size_t const cResult = jobPtr->cSize; + size_t const produced = ZSTD_isError(cResult) ? 0 : cResult; + size_t const flushed = ZSTD_isError(cResult) ? 0 : jobPtr->dstFlushed; + assert(flushed <= produced); + assert(jobPtr->consumed <= jobPtr->src.size); + toFlush = produced - flushed; + /* if toFlush==0, nothing is available to flush. + * However, jobID is expected to still be active: + * if jobID was already completed and fully flushed, + * ZSTDMT_flushProduced() should have already moved onto next job. + * Therefore, some input has not yet been consumed. */ + if (toFlush==0) { + assert(jobPtr->consumed < jobPtr->src.size); + } + } + ZSTD_pthread_mutex_unlock(&mtctx->jobs[wJobID].job_mutex); + } + + return toFlush; +} + + +/* ------------------------------------------ */ +/* ===== Multi-threaded compression ===== */ +/* ------------------------------------------ */ + +static unsigned ZSTDMT_computeTargetJobLog(const ZSTD_CCtx_params* params) +{ + unsigned jobLog; + if (params->ldmParams.enableLdm == ZSTD_ps_enable) { + /* In Long Range Mode, the windowLog is typically oversized. + * In which case, it's preferable to determine the jobSize + * based on cycleLog instead. */ + jobLog = MAX(21, ZSTD_cycleLog(params->cParams.chainLog, params->cParams.strategy) + 3); + } else { + jobLog = MAX(20, params->cParams.windowLog + 2); + } + return MIN(jobLog, (unsigned)ZSTDMT_JOBLOG_MAX); +} + +static int ZSTDMT_overlapLog_default(ZSTD_strategy strat) +{ + switch(strat) + { + case ZSTD_btultra2: + return 9; + case ZSTD_btultra: + case ZSTD_btopt: + return 8; + case ZSTD_btlazy2: + case ZSTD_lazy2: + return 7; + case ZSTD_lazy: + case ZSTD_greedy: + case ZSTD_dfast: + case ZSTD_fast: + default:; + } + return 6; +} + +static int ZSTDMT_overlapLog(int ovlog, ZSTD_strategy strat) +{ + assert(0 <= ovlog && ovlog <= 9); + if (ovlog == 0) return ZSTDMT_overlapLog_default(strat); + return ovlog; +} + +static size_t ZSTDMT_computeOverlapSize(const ZSTD_CCtx_params* params) +{ + int const overlapRLog = 9 - ZSTDMT_overlapLog(params->overlapLog, params->cParams.strategy); + int ovLog = (overlapRLog >= 8) ? 0 : (params->cParams.windowLog - overlapRLog); + assert(0 <= overlapRLog && overlapRLog <= 8); + if (params->ldmParams.enableLdm == ZSTD_ps_enable) { + /* In Long Range Mode, the windowLog is typically oversized. + * In which case, it's preferable to determine the jobSize + * based on chainLog instead. + * Then, ovLog becomes a fraction of the jobSize, rather than windowSize */ + ovLog = MIN(params->cParams.windowLog, ZSTDMT_computeTargetJobLog(params) - 2) + - overlapRLog; + } + assert(0 <= ovLog && ovLog <= ZSTD_WINDOWLOG_MAX); + DEBUGLOG(4, "overlapLog : %i", params->overlapLog); + DEBUGLOG(4, "overlap size : %i", 1 << ovLog); + return (ovLog==0) ? 0 : (size_t)1 << ovLog; +} + +/* ====================================== */ +/* ======= Streaming API ======= */ +/* ====================================== */ + +size_t ZSTDMT_initCStream_internal( + ZSTDMT_CCtx* mtctx, + const void* dict, size_t dictSize, ZSTD_dictContentType_e dictContentType, + const ZSTD_CDict* cdict, ZSTD_CCtx_params params, + unsigned long long pledgedSrcSize) +{ + DEBUGLOG(4, "ZSTDMT_initCStream_internal (pledgedSrcSize=%u, nbWorkers=%u, cctxPool=%u)", + (U32)pledgedSrcSize, params.nbWorkers, mtctx->cctxPool->totalCCtx); + + /* params supposed partially fully validated at this point */ + assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); + assert(!((dict) && (cdict))); /* either dict or cdict, not both */ + + /* init */ + if (params.nbWorkers != mtctx->params.nbWorkers) + FORWARD_IF_ERROR( ZSTDMT_resize(mtctx, params.nbWorkers) , ""); + + if (params.jobSize != 0 && params.jobSize < ZSTDMT_JOBSIZE_MIN) params.jobSize = ZSTDMT_JOBSIZE_MIN; + if (params.jobSize > (size_t)ZSTDMT_JOBSIZE_MAX) params.jobSize = (size_t)ZSTDMT_JOBSIZE_MAX; + + DEBUGLOG(4, "ZSTDMT_initCStream_internal: %u workers", params.nbWorkers); + + if (mtctx->allJobsCompleted == 0) { /* previous compression not correctly finished */ + ZSTDMT_waitForAllJobsCompleted(mtctx); + ZSTDMT_releaseAllJobResources(mtctx); + mtctx->allJobsCompleted = 1; + } + + mtctx->params = params; + mtctx->frameContentSize = pledgedSrcSize; + if (dict) { + ZSTD_freeCDict(mtctx->cdictLocal); + mtctx->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize, + ZSTD_dlm_byCopy, dictContentType, /* note : a loadPrefix becomes an internal CDict */ + params.cParams, mtctx->cMem); + mtctx->cdict = mtctx->cdictLocal; + if (mtctx->cdictLocal == NULL) return ERROR(memory_allocation); + } else { + ZSTD_freeCDict(mtctx->cdictLocal); + mtctx->cdictLocal = NULL; + mtctx->cdict = cdict; + } + + mtctx->targetPrefixSize = ZSTDMT_computeOverlapSize(¶ms); + DEBUGLOG(4, "overlapLog=%i => %u KB", params.overlapLog, (U32)(mtctx->targetPrefixSize>>10)); + mtctx->targetSectionSize = params.jobSize; + if (mtctx->targetSectionSize == 0) { + mtctx->targetSectionSize = 1ULL << ZSTDMT_computeTargetJobLog(¶ms); + } + assert(mtctx->targetSectionSize <= (size_t)ZSTDMT_JOBSIZE_MAX); + + if (params.rsyncable) { + /* Aim for the targetsectionSize as the average job size. */ + U32 const jobSizeKB = (U32)(mtctx->targetSectionSize >> 10); + U32 const rsyncBits = (assert(jobSizeKB >= 1), ZSTD_highbit32(jobSizeKB) + 10); + /* We refuse to create jobs < RSYNC_MIN_BLOCK_SIZE bytes, so make sure our + * expected job size is at least 4x larger. */ + assert(rsyncBits >= RSYNC_MIN_BLOCK_LOG + 2); + DEBUGLOG(4, "rsyncLog = %u", rsyncBits); + mtctx->rsync.hash = 0; + mtctx->rsync.hitMask = (1ULL << rsyncBits) - 1; + mtctx->rsync.primePower = ZSTD_rollingHash_primePower(RSYNC_LENGTH); + } + if (mtctx->targetSectionSize < mtctx->targetPrefixSize) mtctx->targetSectionSize = mtctx->targetPrefixSize; /* job size must be >= overlap size */ + DEBUGLOG(4, "Job Size : %u KB (note : set to %u)", (U32)(mtctx->targetSectionSize>>10), (U32)params.jobSize); + DEBUGLOG(4, "inBuff Size : %u KB", (U32)(mtctx->targetSectionSize>>10)); + ZSTDMT_setBufferSize(mtctx->bufPool, ZSTD_compressBound(mtctx->targetSectionSize)); + { + /* If ldm is enabled we need windowSize space. */ + size_t const windowSize = mtctx->params.ldmParams.enableLdm == ZSTD_ps_enable ? (1U << mtctx->params.cParams.windowLog) : 0; + /* Two buffers of slack, plus extra space for the overlap + * This is the minimum slack that LDM works with. One extra because + * flush might waste up to targetSectionSize-1 bytes. Another extra + * for the overlap (if > 0), then one to fill which doesn't overlap + * with the LDM window. + */ + size_t const nbSlackBuffers = 2 + (mtctx->targetPrefixSize > 0); + size_t const slackSize = mtctx->targetSectionSize * nbSlackBuffers; + /* Compute the total size, and always have enough slack */ + size_t const nbWorkers = MAX(mtctx->params.nbWorkers, 1); + size_t const sectionsSize = mtctx->targetSectionSize * nbWorkers; + size_t const capacity = MAX(windowSize, sectionsSize) + slackSize; + if (mtctx->roundBuff.capacity < capacity) { + if (mtctx->roundBuff.buffer) + ZSTD_customFree(mtctx->roundBuff.buffer, mtctx->cMem); + mtctx->roundBuff.buffer = (BYTE*)ZSTD_customMalloc(capacity, mtctx->cMem); + if (mtctx->roundBuff.buffer == NULL) { + mtctx->roundBuff.capacity = 0; + return ERROR(memory_allocation); + } + mtctx->roundBuff.capacity = capacity; + } + } + DEBUGLOG(4, "roundBuff capacity : %u KB", (U32)(mtctx->roundBuff.capacity>>10)); + mtctx->roundBuff.pos = 0; + mtctx->inBuff.buffer = g_nullBuffer; + mtctx->inBuff.filled = 0; + mtctx->inBuff.prefix = kNullRange; + mtctx->doneJobID = 0; + mtctx->nextJobID = 0; + mtctx->frameEnded = 0; + mtctx->allJobsCompleted = 0; + mtctx->consumed = 0; + mtctx->produced = 0; + if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params, mtctx->targetSectionSize, + dict, dictSize, dictContentType)) + return ERROR(memory_allocation); + return 0; +} + + +/* ZSTDMT_writeLastEmptyBlock() + * Write a single empty block with an end-of-frame to finish a frame. + * Job must be created from streaming variant. + * This function is always successful if expected conditions are fulfilled. + */ +static void ZSTDMT_writeLastEmptyBlock(ZSTDMT_jobDescription* job) +{ + assert(job->lastJob == 1); + assert(job->src.size == 0); /* last job is empty -> will be simplified into a last empty block */ + assert(job->firstJob == 0); /* cannot be first job, as it also needs to create frame header */ + assert(job->dstBuff.start == NULL); /* invoked from streaming variant only (otherwise, dstBuff might be user's output) */ + job->dstBuff = ZSTDMT_getBuffer(job->bufPool); + if (job->dstBuff.start == NULL) { + job->cSize = ERROR(memory_allocation); + return; + } + assert(job->dstBuff.capacity >= ZSTD_blockHeaderSize); /* no buffer should ever be that small */ + job->src = kNullRange; + job->cSize = ZSTD_writeLastEmptyBlock(job->dstBuff.start, job->dstBuff.capacity); + assert(!ZSTD_isError(job->cSize)); + assert(job->consumed == 0); +} + +static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* mtctx, size_t srcSize, ZSTD_EndDirective endOp) +{ + unsigned const jobID = mtctx->nextJobID & mtctx->jobIDMask; + int const endFrame = (endOp == ZSTD_e_end); + + if (mtctx->nextJobID > mtctx->doneJobID + mtctx->jobIDMask) { + DEBUGLOG(5, "ZSTDMT_createCompressionJob: will not create new job : table is full"); + assert((mtctx->nextJobID & mtctx->jobIDMask) == (mtctx->doneJobID & mtctx->jobIDMask)); + return 0; + } + + if (!mtctx->jobReady) { + BYTE const* src = (BYTE const*)mtctx->inBuff.buffer.start; + DEBUGLOG(5, "ZSTDMT_createCompressionJob: preparing job %u to compress %u bytes with %u preload ", + mtctx->nextJobID, (U32)srcSize, (U32)mtctx->inBuff.prefix.size); + mtctx->jobs[jobID].src.start = src; + mtctx->jobs[jobID].src.size = srcSize; + assert(mtctx->inBuff.filled >= srcSize); + mtctx->jobs[jobID].prefix = mtctx->inBuff.prefix; + mtctx->jobs[jobID].consumed = 0; + mtctx->jobs[jobID].cSize = 0; + mtctx->jobs[jobID].params = mtctx->params; + mtctx->jobs[jobID].cdict = mtctx->nextJobID==0 ? mtctx->cdict : NULL; + mtctx->jobs[jobID].fullFrameSize = mtctx->frameContentSize; + mtctx->jobs[jobID].dstBuff = g_nullBuffer; + mtctx->jobs[jobID].cctxPool = mtctx->cctxPool; + mtctx->jobs[jobID].bufPool = mtctx->bufPool; + mtctx->jobs[jobID].seqPool = mtctx->seqPool; + mtctx->jobs[jobID].serial = &mtctx->serial; + mtctx->jobs[jobID].jobID = mtctx->nextJobID; + mtctx->jobs[jobID].firstJob = (mtctx->nextJobID==0); + mtctx->jobs[jobID].lastJob = endFrame; + mtctx->jobs[jobID].frameChecksumNeeded = mtctx->params.fParams.checksumFlag && endFrame && (mtctx->nextJobID>0); + mtctx->jobs[jobID].dstFlushed = 0; + + /* Update the round buffer pos and clear the input buffer to be reset */ + mtctx->roundBuff.pos += srcSize; + mtctx->inBuff.buffer = g_nullBuffer; + mtctx->inBuff.filled = 0; + /* Set the prefix */ + if (!endFrame) { + size_t const newPrefixSize = MIN(srcSize, mtctx->targetPrefixSize); + mtctx->inBuff.prefix.start = src + srcSize - newPrefixSize; + mtctx->inBuff.prefix.size = newPrefixSize; + } else { /* endFrame==1 => no need for another input buffer */ + mtctx->inBuff.prefix = kNullRange; + mtctx->frameEnded = endFrame; + if (mtctx->nextJobID == 0) { + /* single job exception : checksum is already calculated directly within worker thread */ + mtctx->params.fParams.checksumFlag = 0; + } } + + if ( (srcSize == 0) + && (mtctx->nextJobID>0)/*single job must also write frame header*/ ) { + DEBUGLOG(5, "ZSTDMT_createCompressionJob: creating a last empty block to end frame"); + assert(endOp == ZSTD_e_end); /* only possible case : need to end the frame with an empty last block */ + ZSTDMT_writeLastEmptyBlock(mtctx->jobs + jobID); + mtctx->nextJobID++; + return 0; + } + } + + DEBUGLOG(5, "ZSTDMT_createCompressionJob: posting job %u : %u bytes (end:%u, jobNb == %u (mod:%u))", + mtctx->nextJobID, + (U32)mtctx->jobs[jobID].src.size, + mtctx->jobs[jobID].lastJob, + mtctx->nextJobID, + jobID); + if (POOL_tryAdd(mtctx->factory, ZSTDMT_compressionJob, &mtctx->jobs[jobID])) { + mtctx->nextJobID++; + mtctx->jobReady = 0; + } else { + DEBUGLOG(5, "ZSTDMT_createCompressionJob: no worker available for job %u", mtctx->nextJobID); + mtctx->jobReady = 1; + } + return 0; +} + + +/*! ZSTDMT_flushProduced() : + * flush whatever data has been produced but not yet flushed in current job. + * move to next job if current one is fully flushed. + * `output` : `pos` will be updated with amount of data flushed . + * `blockToFlush` : if >0, the function will block and wait if there is no data available to flush . + * @return : amount of data remaining within internal buffer, 0 if no more, 1 if unknown but > 0, or an error code */ +static size_t ZSTDMT_flushProduced(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, unsigned blockToFlush, ZSTD_EndDirective end) +{ + unsigned const wJobID = mtctx->doneJobID & mtctx->jobIDMask; + DEBUGLOG(5, "ZSTDMT_flushProduced (blocking:%u , job %u <= %u)", + blockToFlush, mtctx->doneJobID, mtctx->nextJobID); + assert(output->size >= output->pos); + + ZSTD_PTHREAD_MUTEX_LOCK(&mtctx->jobs[wJobID].job_mutex); + if ( blockToFlush + && (mtctx->doneJobID < mtctx->nextJobID) ) { + assert(mtctx->jobs[wJobID].dstFlushed <= mtctx->jobs[wJobID].cSize); + while (mtctx->jobs[wJobID].dstFlushed == mtctx->jobs[wJobID].cSize) { /* nothing to flush */ + if (mtctx->jobs[wJobID].consumed == mtctx->jobs[wJobID].src.size) { + DEBUGLOG(5, "job %u is completely consumed (%u == %u) => don't wait for cond, there will be none", + mtctx->doneJobID, (U32)mtctx->jobs[wJobID].consumed, (U32)mtctx->jobs[wJobID].src.size); + break; + } + DEBUGLOG(5, "waiting for something to flush from job %u (currently flushed: %u bytes)", + mtctx->doneJobID, (U32)mtctx->jobs[wJobID].dstFlushed); + ZSTD_pthread_cond_wait(&mtctx->jobs[wJobID].job_cond, &mtctx->jobs[wJobID].job_mutex); /* block when nothing to flush but some to come */ + } } + + /* try to flush something */ + { size_t cSize = mtctx->jobs[wJobID].cSize; /* shared */ + size_t const srcConsumed = mtctx->jobs[wJobID].consumed; /* shared */ + size_t const srcSize = mtctx->jobs[wJobID].src.size; /* read-only, could be done after mutex lock, but no-declaration-after-statement */ + ZSTD_pthread_mutex_unlock(&mtctx->jobs[wJobID].job_mutex); + if (ZSTD_isError(cSize)) { + DEBUGLOG(5, "ZSTDMT_flushProduced: job %u : compression error detected : %s", + mtctx->doneJobID, ZSTD_getErrorName(cSize)); + ZSTDMT_waitForAllJobsCompleted(mtctx); + ZSTDMT_releaseAllJobResources(mtctx); + return cSize; + } + /* add frame checksum if necessary (can only happen once) */ + assert(srcConsumed <= srcSize); + if ( (srcConsumed == srcSize) /* job completed -> worker no longer active */ + && mtctx->jobs[wJobID].frameChecksumNeeded ) { + U32 const checksum = (U32)XXH64_digest(&mtctx->serial.xxhState); + DEBUGLOG(4, "ZSTDMT_flushProduced: writing checksum : %08X \n", checksum); + MEM_writeLE32((char*)mtctx->jobs[wJobID].dstBuff.start + mtctx->jobs[wJobID].cSize, checksum); + cSize += 4; + mtctx->jobs[wJobID].cSize += 4; /* can write this shared value, as worker is no longer active */ + mtctx->jobs[wJobID].frameChecksumNeeded = 0; + } + + if (cSize > 0) { /* compression is ongoing or completed */ + size_t const toFlush = MIN(cSize - mtctx->jobs[wJobID].dstFlushed, output->size - output->pos); + DEBUGLOG(5, "ZSTDMT_flushProduced: Flushing %u bytes from job %u (completion:%u/%u, generated:%u)", + (U32)toFlush, mtctx->doneJobID, (U32)srcConsumed, (U32)srcSize, (U32)cSize); + assert(mtctx->doneJobID < mtctx->nextJobID); + assert(cSize >= mtctx->jobs[wJobID].dstFlushed); + assert(mtctx->jobs[wJobID].dstBuff.start != NULL); + if (toFlush > 0) { + ZSTD_memcpy((char*)output->dst + output->pos, + (const char*)mtctx->jobs[wJobID].dstBuff.start + mtctx->jobs[wJobID].dstFlushed, + toFlush); + } + output->pos += toFlush; + mtctx->jobs[wJobID].dstFlushed += toFlush; /* can write : this value is only used by mtctx */ + + if ( (srcConsumed == srcSize) /* job is completed */ + && (mtctx->jobs[wJobID].dstFlushed == cSize) ) { /* output buffer fully flushed => free this job position */ + DEBUGLOG(5, "Job %u completed (%u bytes), moving to next one", + mtctx->doneJobID, (U32)mtctx->jobs[wJobID].dstFlushed); + ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[wJobID].dstBuff); + DEBUGLOG(5, "dstBuffer released"); + mtctx->jobs[wJobID].dstBuff = g_nullBuffer; + mtctx->jobs[wJobID].cSize = 0; /* ensure this job slot is considered "not started" in future check */ + mtctx->consumed += srcSize; + mtctx->produced += cSize; + mtctx->doneJobID++; + } } + + /* return value : how many bytes left in buffer ; fake it to 1 when unknown but >0 */ + if (cSize > mtctx->jobs[wJobID].dstFlushed) return (cSize - mtctx->jobs[wJobID].dstFlushed); + if (srcSize > srcConsumed) return 1; /* current job not completely compressed */ + } + if (mtctx->doneJobID < mtctx->nextJobID) return 1; /* some more jobs ongoing */ + if (mtctx->jobReady) return 1; /* one job is ready to push, just not yet in the list */ + if (mtctx->inBuff.filled > 0) return 1; /* input is not empty, and still needs to be converted into a job */ + mtctx->allJobsCompleted = mtctx->frameEnded; /* all jobs are entirely flushed => if this one is last one, frame is completed */ + if (end == ZSTD_e_end) return !mtctx->frameEnded; /* for ZSTD_e_end, question becomes : is frame completed ? instead of : are internal buffers fully flushed ? */ + return 0; /* internal buffers fully flushed */ +} + +/** + * Returns the range of data used by the earliest job that is not yet complete. + * If the data of the first job is broken up into two segments, we cover both + * sections. + */ +static range_t ZSTDMT_getInputDataInUse(ZSTDMT_CCtx* mtctx) +{ + unsigned const firstJobID = mtctx->doneJobID; + unsigned const lastJobID = mtctx->nextJobID; + unsigned jobID; + + for (jobID = firstJobID; jobID < lastJobID; ++jobID) { + unsigned const wJobID = jobID & mtctx->jobIDMask; + size_t consumed; + + ZSTD_PTHREAD_MUTEX_LOCK(&mtctx->jobs[wJobID].job_mutex); + consumed = mtctx->jobs[wJobID].consumed; + ZSTD_pthread_mutex_unlock(&mtctx->jobs[wJobID].job_mutex); + + if (consumed < mtctx->jobs[wJobID].src.size) { + range_t range = mtctx->jobs[wJobID].prefix; + if (range.size == 0) { + /* Empty prefix */ + range = mtctx->jobs[wJobID].src; + } + /* Job source in multiple segments not supported yet */ + assert(range.start <= mtctx->jobs[wJobID].src.start); + return range; + } + } + return kNullRange; +} + +/** + * Returns non-zero iff buffer and range overlap. + */ +static int ZSTDMT_isOverlapped(buffer_t buffer, range_t range) +{ + BYTE const* const bufferStart = (BYTE const*)buffer.start; + BYTE const* const rangeStart = (BYTE const*)range.start; + + if (rangeStart == NULL || bufferStart == NULL) + return 0; + + { + BYTE const* const bufferEnd = bufferStart + buffer.capacity; + BYTE const* const rangeEnd = rangeStart + range.size; + + /* Empty ranges cannot overlap */ + if (bufferStart == bufferEnd || rangeStart == rangeEnd) + return 0; + + return bufferStart < rangeEnd && rangeStart < bufferEnd; + } +} + +static int ZSTDMT_doesOverlapWindow(buffer_t buffer, ZSTD_window_t window) +{ + range_t extDict; + range_t prefix; + + DEBUGLOG(5, "ZSTDMT_doesOverlapWindow"); + extDict.start = window.dictBase + window.lowLimit; + extDict.size = window.dictLimit - window.lowLimit; + + prefix.start = window.base + window.dictLimit; + prefix.size = window.nextSrc - (window.base + window.dictLimit); + DEBUGLOG(5, "extDict [0x%zx, 0x%zx)", + (size_t)extDict.start, + (size_t)extDict.start + extDict.size); + DEBUGLOG(5, "prefix [0x%zx, 0x%zx)", + (size_t)prefix.start, + (size_t)prefix.start + prefix.size); + + return ZSTDMT_isOverlapped(buffer, extDict) + || ZSTDMT_isOverlapped(buffer, prefix); +} + +static void ZSTDMT_waitForLdmComplete(ZSTDMT_CCtx* mtctx, buffer_t buffer) +{ + if (mtctx->params.ldmParams.enableLdm == ZSTD_ps_enable) { + ZSTD_pthread_mutex_t* mutex = &mtctx->serial.ldmWindowMutex; + DEBUGLOG(5, "ZSTDMT_waitForLdmComplete"); + DEBUGLOG(5, "source [0x%zx, 0x%zx)", + (size_t)buffer.start, + (size_t)buffer.start + buffer.capacity); + ZSTD_PTHREAD_MUTEX_LOCK(mutex); + while (ZSTDMT_doesOverlapWindow(buffer, mtctx->serial.ldmWindow)) { + DEBUGLOG(5, "Waiting for LDM to finish..."); + ZSTD_pthread_cond_wait(&mtctx->serial.ldmWindowCond, mutex); + } + DEBUGLOG(6, "Done waiting for LDM to finish"); + ZSTD_pthread_mutex_unlock(mutex); + } +} + +/** + * Attempts to set the inBuff to the next section to fill. + * If any part of the new section is still in use we give up. + * Returns non-zero if the buffer is filled. + */ +static int ZSTDMT_tryGetInputRange(ZSTDMT_CCtx* mtctx) +{ + range_t const inUse = ZSTDMT_getInputDataInUse(mtctx); + size_t const spaceLeft = mtctx->roundBuff.capacity - mtctx->roundBuff.pos; + size_t const target = mtctx->targetSectionSize; + buffer_t buffer; + + DEBUGLOG(5, "ZSTDMT_tryGetInputRange"); + assert(mtctx->inBuff.buffer.start == NULL); + assert(mtctx->roundBuff.capacity >= target); + + if (spaceLeft < target) { + /* ZSTD_invalidateRepCodes() doesn't work for extDict variants. + * Simply copy the prefix to the beginning in that case. + */ + BYTE* const start = (BYTE*)mtctx->roundBuff.buffer; + size_t const prefixSize = mtctx->inBuff.prefix.size; + + buffer.start = start; + buffer.capacity = prefixSize; + if (ZSTDMT_isOverlapped(buffer, inUse)) { + DEBUGLOG(5, "Waiting for buffer..."); + return 0; + } + ZSTDMT_waitForLdmComplete(mtctx, buffer); + ZSTD_memmove(start, mtctx->inBuff.prefix.start, prefixSize); + mtctx->inBuff.prefix.start = start; + mtctx->roundBuff.pos = prefixSize; + } + buffer.start = mtctx->roundBuff.buffer + mtctx->roundBuff.pos; + buffer.capacity = target; + + if (ZSTDMT_isOverlapped(buffer, inUse)) { + DEBUGLOG(5, "Waiting for buffer..."); + return 0; + } + assert(!ZSTDMT_isOverlapped(buffer, mtctx->inBuff.prefix)); + + ZSTDMT_waitForLdmComplete(mtctx, buffer); + + DEBUGLOG(5, "Using prefix range [%zx, %zx)", + (size_t)mtctx->inBuff.prefix.start, + (size_t)mtctx->inBuff.prefix.start + mtctx->inBuff.prefix.size); + DEBUGLOG(5, "Using source range [%zx, %zx)", + (size_t)buffer.start, + (size_t)buffer.start + buffer.capacity); + + + mtctx->inBuff.buffer = buffer; + mtctx->inBuff.filled = 0; + assert(mtctx->roundBuff.pos + buffer.capacity <= mtctx->roundBuff.capacity); + return 1; +} + +typedef struct { + size_t toLoad; /* The number of bytes to load from the input. */ + int flush; /* Boolean declaring if we must flush because we found a synchronization point. */ +} syncPoint_t; + +/** + * Searches through the input for a synchronization point. If one is found, we + * will instruct the caller to flush, and return the number of bytes to load. + * Otherwise, we will load as many bytes as possible and instruct the caller + * to continue as normal. + */ +static syncPoint_t +findSynchronizationPoint(ZSTDMT_CCtx const* mtctx, ZSTD_inBuffer const input) +{ + BYTE const* const istart = (BYTE const*)input.src + input.pos; + U64 const primePower = mtctx->rsync.primePower; + U64 const hitMask = mtctx->rsync.hitMask; + + syncPoint_t syncPoint; + U64 hash; + BYTE const* prev; + size_t pos; + + syncPoint.toLoad = MIN(input.size - input.pos, mtctx->targetSectionSize - mtctx->inBuff.filled); + syncPoint.flush = 0; + if (!mtctx->params.rsyncable) + /* Rsync is disabled. */ + return syncPoint; + if (mtctx->inBuff.filled + input.size - input.pos < RSYNC_MIN_BLOCK_SIZE) + /* We don't emit synchronization points if it would produce too small blocks. + * We don't have enough input to find a synchronization point, so don't look. + */ + return syncPoint; + if (mtctx->inBuff.filled + syncPoint.toLoad < RSYNC_LENGTH) + /* Not enough to compute the hash. + * We will miss any synchronization points in this RSYNC_LENGTH byte + * window. However, since it depends only in the internal buffers, if the + * state is already synchronized, we will remain synchronized. + * Additionally, the probability that we miss a synchronization point is + * low: RSYNC_LENGTH / targetSectionSize. + */ + return syncPoint; + /* Initialize the loop variables. */ + if (mtctx->inBuff.filled < RSYNC_MIN_BLOCK_SIZE) { + /* We don't need to scan the first RSYNC_MIN_BLOCK_SIZE positions + * because they can't possibly be a sync point. So we can start + * part way through the input buffer. + */ + pos = RSYNC_MIN_BLOCK_SIZE - mtctx->inBuff.filled; + if (pos >= RSYNC_LENGTH) { + prev = istart + pos - RSYNC_LENGTH; + hash = ZSTD_rollingHash_compute(prev, RSYNC_LENGTH); + } else { + assert(mtctx->inBuff.filled >= RSYNC_LENGTH); + prev = (BYTE const*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled - RSYNC_LENGTH; + hash = ZSTD_rollingHash_compute(prev + pos, (RSYNC_LENGTH - pos)); + hash = ZSTD_rollingHash_append(hash, istart, pos); + } + } else { + /* We have enough bytes buffered to initialize the hash, + * and have processed enough bytes to find a sync point. + * Start scanning at the beginning of the input. + */ + assert(mtctx->inBuff.filled >= RSYNC_MIN_BLOCK_SIZE); + assert(RSYNC_MIN_BLOCK_SIZE >= RSYNC_LENGTH); + pos = 0; + prev = (BYTE const*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled - RSYNC_LENGTH; + hash = ZSTD_rollingHash_compute(prev, RSYNC_LENGTH); + if ((hash & hitMask) == hitMask) { + /* We're already at a sync point so don't load any more until + * we're able to flush this sync point. + * This likely happened because the job table was full so we + * couldn't add our job. + */ + syncPoint.toLoad = 0; + syncPoint.flush = 1; + return syncPoint; + } + } + /* Starting with the hash of the previous RSYNC_LENGTH bytes, roll + * through the input. If we hit a synchronization point, then cut the + * job off, and tell the compressor to flush the job. Otherwise, load + * all the bytes and continue as normal. + * If we go too long without a synchronization point (targetSectionSize) + * then a block will be emitted anyways, but this is okay, since if we + * are already synchronized we will remain synchronized. + */ + assert(pos < RSYNC_LENGTH || ZSTD_rollingHash_compute(istart + pos - RSYNC_LENGTH, RSYNC_LENGTH) == hash); + for (; pos < syncPoint.toLoad; ++pos) { + BYTE const toRemove = pos < RSYNC_LENGTH ? prev[pos] : istart[pos - RSYNC_LENGTH]; + /* This assert is very expensive, and Debian compiles with asserts enabled. + * So disable it for now. We can get similar coverage by checking it at the + * beginning & end of the loop. + * assert(pos < RSYNC_LENGTH || ZSTD_rollingHash_compute(istart + pos - RSYNC_LENGTH, RSYNC_LENGTH) == hash); + */ + hash = ZSTD_rollingHash_rotate(hash, toRemove, istart[pos], primePower); + assert(mtctx->inBuff.filled + pos >= RSYNC_MIN_BLOCK_SIZE); + if ((hash & hitMask) == hitMask) { + syncPoint.toLoad = pos + 1; + syncPoint.flush = 1; + ++pos; /* for assert */ + break; + } + } + assert(pos < RSYNC_LENGTH || ZSTD_rollingHash_compute(istart + pos - RSYNC_LENGTH, RSYNC_LENGTH) == hash); + return syncPoint; +} + +size_t ZSTDMT_nextInputSizeHint(const ZSTDMT_CCtx* mtctx) +{ + size_t hintInSize = mtctx->targetSectionSize - mtctx->inBuff.filled; + if (hintInSize==0) hintInSize = mtctx->targetSectionSize; + return hintInSize; +} + +/** ZSTDMT_compressStream_generic() : + * internal use only - exposed to be invoked from zstd_compress.c + * assumption : output and input are valid (pos <= size) + * @return : minimum amount of data remaining to flush, 0 if none */ +size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx, + ZSTD_outBuffer* output, + ZSTD_inBuffer* input, + ZSTD_EndDirective endOp) +{ + unsigned forwardInputProgress = 0; + DEBUGLOG(5, "ZSTDMT_compressStream_generic (endOp=%u, srcSize=%u)", + (U32)endOp, (U32)(input->size - input->pos)); + assert(output->pos <= output->size); + assert(input->pos <= input->size); + + if ((mtctx->frameEnded) && (endOp==ZSTD_e_continue)) { + /* current frame being ended. Only flush/end are allowed */ + return ERROR(stage_wrong); + } + + /* fill input buffer */ + if ( (!mtctx->jobReady) + && (input->size > input->pos) ) { /* support NULL input */ + if (mtctx->inBuff.buffer.start == NULL) { + assert(mtctx->inBuff.filled == 0); /* Can't fill an empty buffer */ + if (!ZSTDMT_tryGetInputRange(mtctx)) { + /* It is only possible for this operation to fail if there are + * still compression jobs ongoing. + */ + DEBUGLOG(5, "ZSTDMT_tryGetInputRange failed"); + assert(mtctx->doneJobID != mtctx->nextJobID); + } else + DEBUGLOG(5, "ZSTDMT_tryGetInputRange completed successfully : mtctx->inBuff.buffer.start = %p", mtctx->inBuff.buffer.start); + } + if (mtctx->inBuff.buffer.start != NULL) { + syncPoint_t const syncPoint = findSynchronizationPoint(mtctx, *input); + if (syncPoint.flush && endOp == ZSTD_e_continue) { + endOp = ZSTD_e_flush; + } + assert(mtctx->inBuff.buffer.capacity >= mtctx->targetSectionSize); + DEBUGLOG(5, "ZSTDMT_compressStream_generic: adding %u bytes on top of %u to buffer of size %u", + (U32)syncPoint.toLoad, (U32)mtctx->inBuff.filled, (U32)mtctx->targetSectionSize); + ZSTD_memcpy((char*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled, (const char*)input->src + input->pos, syncPoint.toLoad); + input->pos += syncPoint.toLoad; + mtctx->inBuff.filled += syncPoint.toLoad; + forwardInputProgress = syncPoint.toLoad>0; + } + } + if ((input->pos < input->size) && (endOp == ZSTD_e_end)) { + /* Can't end yet because the input is not fully consumed. + * We are in one of these cases: + * - mtctx->inBuff is NULL & empty: we couldn't get an input buffer so don't create a new job. + * - We filled the input buffer: flush this job but don't end the frame. + * - We hit a synchronization point: flush this job but don't end the frame. + */ + assert(mtctx->inBuff.filled == 0 || mtctx->inBuff.filled == mtctx->targetSectionSize || mtctx->params.rsyncable); + endOp = ZSTD_e_flush; + } + + if ( (mtctx->jobReady) + || (mtctx->inBuff.filled >= mtctx->targetSectionSize) /* filled enough : let's compress */ + || ((endOp != ZSTD_e_continue) && (mtctx->inBuff.filled > 0)) /* something to flush : let's go */ + || ((endOp == ZSTD_e_end) && (!mtctx->frameEnded)) ) { /* must finish the frame with a zero-size block */ + size_t const jobSize = mtctx->inBuff.filled; + assert(mtctx->inBuff.filled <= mtctx->targetSectionSize); + FORWARD_IF_ERROR( ZSTDMT_createCompressionJob(mtctx, jobSize, endOp) , ""); + } + + /* check for potential compressed data ready to be flushed */ + { size_t const remainingToFlush = ZSTDMT_flushProduced(mtctx, output, !forwardInputProgress, endOp); /* block if there was no forward input progress */ + if (input->pos < input->size) return MAX(remainingToFlush, 1); /* input not consumed : do not end flush yet */ + DEBUGLOG(5, "end of ZSTDMT_compressStream_generic: remainingToFlush = %u", (U32)remainingToFlush); + return remainingToFlush; + } +} diff --git a/c-blosc/internal-complibs/zstd-1.5.6/compress/zstdmt_compress.h b/c-blosc/internal-complibs/zstd-1.5.6/compress/zstdmt_compress.h new file mode 100644 index 0000000..ed4dc0e --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.5.6/compress/zstdmt_compress.h @@ -0,0 +1,113 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + #ifndef ZSTDMT_COMPRESS_H + #define ZSTDMT_COMPRESS_H + + #if defined (__cplusplus) + extern "C" { + #endif + + +/* Note : This is an internal API. + * These APIs used to be exposed with ZSTDLIB_API, + * because it used to be the only way to invoke MT compression. + * Now, you must use ZSTD_compress2 and ZSTD_compressStream2() instead. + * + * This API requires ZSTD_MULTITHREAD to be defined during compilation, + * otherwise ZSTDMT_createCCtx*() will fail. + */ + +/* === Dependencies === */ +#include "../common/zstd_deps.h" /* size_t */ +#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_parameters */ +#include "../zstd.h" /* ZSTD_inBuffer, ZSTD_outBuffer, ZSTDLIB_API */ + + +/* === Constants === */ +#ifndef ZSTDMT_NBWORKERS_MAX /* a different value can be selected at compile time */ +# define ZSTDMT_NBWORKERS_MAX ((sizeof(void*)==4) /*32-bit*/ ? 64 : 256) +#endif +#ifndef ZSTDMT_JOBSIZE_MIN /* a different value can be selected at compile time */ +# define ZSTDMT_JOBSIZE_MIN (512 KB) +#endif +#define ZSTDMT_JOBLOG_MAX (MEM_32bits() ? 29 : 30) +#define ZSTDMT_JOBSIZE_MAX (MEM_32bits() ? (512 MB) : (1024 MB)) + + +/* ======================================================== + * === Private interface, for use by ZSTD_compress.c === + * === Not exposed in libzstd. Never invoke directly === + * ======================================================== */ + +/* === Memory management === */ +typedef struct ZSTDMT_CCtx_s ZSTDMT_CCtx; +/* Requires ZSTD_MULTITHREAD to be defined during compilation, otherwise it will return NULL. */ +ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers, + ZSTD_customMem cMem, + ZSTD_threadPool *pool); +size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx); + +size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx); + +/* === Streaming functions === */ + +size_t ZSTDMT_nextInputSizeHint(const ZSTDMT_CCtx* mtctx); + +/*! ZSTDMT_initCStream_internal() : + * Private use only. Init streaming operation. + * expects params to be valid. + * must receive dict, or cdict, or none, but not both. + * mtctx can be freshly constructed or reused from a prior compression. + * If mtctx is reused, memory allocations from the prior compression may not be freed, + * even if they are not needed for the current compression. + * @return : 0, or an error code */ +size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* mtctx, + const void* dict, size_t dictSize, ZSTD_dictContentType_e dictContentType, + const ZSTD_CDict* cdict, + ZSTD_CCtx_params params, unsigned long long pledgedSrcSize); + +/*! ZSTDMT_compressStream_generic() : + * Combines ZSTDMT_compressStream() with optional ZSTDMT_flushStream() or ZSTDMT_endStream() + * depending on flush directive. + * @return : minimum amount of data still to be flushed + * 0 if fully flushed + * or an error code + * note : needs to be init using any ZSTD_initCStream*() variant */ +size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx, + ZSTD_outBuffer* output, + ZSTD_inBuffer* input, + ZSTD_EndDirective endOp); + + /*! ZSTDMT_toFlushNow() + * Tell how many bytes are ready to be flushed immediately. + * Probe the oldest active job (not yet entirely flushed) and check its output buffer. + * If return 0, it means there is no active job, + * or, it means oldest job is still active, but everything produced has been flushed so far, + * therefore flushing is limited by speed of oldest job. */ +size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx); + +/*! ZSTDMT_updateCParams_whileCompressing() : + * Updates only a selected set of compression parameters, to remain compatible with current frame. + * New parameters will be applied to next compression job. */ +void ZSTDMT_updateCParams_whileCompressing(ZSTDMT_CCtx* mtctx, const ZSTD_CCtx_params* cctxParams); + +/*! ZSTDMT_getFrameProgression(): + * tells how much data has been consumed (input) and produced (output) for current frame. + * able to count progression inside worker threads. + */ +ZSTD_frameProgression ZSTDMT_getFrameProgression(ZSTDMT_CCtx* mtctx); + + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTDMT_COMPRESS_H */ diff --git a/c-blosc/internal-complibs/zstd-1.5.6/decompress/huf_decompress.c b/c-blosc/internal-complibs/zstd-1.5.6/decompress/huf_decompress.c new file mode 100644 index 0000000..f85dd0b --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.5.6/decompress/huf_decompress.c @@ -0,0 +1,1944 @@ +/* ****************************************************************** + * huff0 huffman decoder, + * part of Finite State Entropy library + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * You can contact the author at : + * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ + +/* ************************************************************** +* Dependencies +****************************************************************/ +#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memset */ +#include "../common/compiler.h" +#include "../common/bitstream.h" /* BIT_* */ +#include "../common/fse.h" /* to compress headers */ +#include "../common/huf.h" +#include "../common/error_private.h" +#include "../common/zstd_internal.h" +#include "../common/bits.h" /* ZSTD_highbit32, ZSTD_countTrailingZeros64 */ + +/* ************************************************************** +* Constants +****************************************************************/ + +#define HUF_DECODER_FAST_TABLELOG 11 + +/* ************************************************************** +* Macros +****************************************************************/ + +#ifdef HUF_DISABLE_FAST_DECODE +# define HUF_ENABLE_FAST_DECODE 0 +#else +# define HUF_ENABLE_FAST_DECODE 1 +#endif + +/* These two optional macros force the use one way or another of the two + * Huffman decompression implementations. You can't force in both directions + * at the same time. + */ +#if defined(HUF_FORCE_DECOMPRESS_X1) && \ + defined(HUF_FORCE_DECOMPRESS_X2) +#error "Cannot force the use of the X1 and X2 decoders at the same time!" +#endif + +/* When DYNAMIC_BMI2 is enabled, fast decoders are only called when bmi2 is + * supported at runtime, so we can add the BMI2 target attribute. + * When it is disabled, we will still get BMI2 if it is enabled statically. + */ +#if DYNAMIC_BMI2 +# define HUF_FAST_BMI2_ATTRS BMI2_TARGET_ATTRIBUTE +#else +# define HUF_FAST_BMI2_ATTRS +#endif + +#ifdef __cplusplus +# define HUF_EXTERN_C extern "C" +#else +# define HUF_EXTERN_C +#endif +#define HUF_ASM_DECL HUF_EXTERN_C + +#if DYNAMIC_BMI2 +# define HUF_NEED_BMI2_FUNCTION 1 +#else +# define HUF_NEED_BMI2_FUNCTION 0 +#endif + +/* ************************************************************** +* Error Management +****************************************************************/ +#define HUF_isError ERR_isError + + +/* ************************************************************** +* Byte alignment for workSpace management +****************************************************************/ +#define HUF_ALIGN(x, a) HUF_ALIGN_MASK((x), (a) - 1) +#define HUF_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask)) + + +/* ************************************************************** +* BMI2 Variant Wrappers +****************************************************************/ +typedef size_t (*HUF_DecompressUsingDTableFn)(void *dst, size_t dstSize, + const void *cSrc, + size_t cSrcSize, + const HUF_DTable *DTable); + +#if DYNAMIC_BMI2 + +#define HUF_DGEN(fn) \ + \ + static size_t fn##_default( \ + void* dst, size_t dstSize, \ + const void* cSrc, size_t cSrcSize, \ + const HUF_DTable* DTable) \ + { \ + return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ + } \ + \ + static BMI2_TARGET_ATTRIBUTE size_t fn##_bmi2( \ + void* dst, size_t dstSize, \ + const void* cSrc, size_t cSrcSize, \ + const HUF_DTable* DTable) \ + { \ + return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ + } \ + \ + static size_t fn(void* dst, size_t dstSize, void const* cSrc, \ + size_t cSrcSize, HUF_DTable const* DTable, int flags) \ + { \ + if (flags & HUF_flags_bmi2) { \ + return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); \ + } \ + return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable); \ + } + +#else + +#define HUF_DGEN(fn) \ + static size_t fn(void* dst, size_t dstSize, void const* cSrc, \ + size_t cSrcSize, HUF_DTable const* DTable, int flags) \ + { \ + (void)flags; \ + return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ + } + +#endif + + +/*-***************************/ +/* generic DTableDesc */ +/*-***************************/ +typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved; } DTableDesc; + +static DTableDesc HUF_getDTableDesc(const HUF_DTable* table) +{ + DTableDesc dtd; + ZSTD_memcpy(&dtd, table, sizeof(dtd)); + return dtd; +} + +static size_t HUF_initFastDStream(BYTE const* ip) { + BYTE const lastByte = ip[7]; + size_t const bitsConsumed = lastByte ? 8 - ZSTD_highbit32(lastByte) : 0; + size_t const value = MEM_readLEST(ip) | 1; + assert(bitsConsumed <= 8); + assert(sizeof(size_t) == 8); + return value << bitsConsumed; +} + + +/** + * The input/output arguments to the Huffman fast decoding loop: + * + * ip [in/out] - The input pointers, must be updated to reflect what is consumed. + * op [in/out] - The output pointers, must be updated to reflect what is written. + * bits [in/out] - The bitstream containers, must be updated to reflect the current state. + * dt [in] - The decoding table. + * ilowest [in] - The beginning of the valid range of the input. Decoders may read + * down to this pointer. It may be below iend[0]. + * oend [in] - The end of the output stream. op[3] must not cross oend. + * iend [in] - The end of each input stream. ip[i] may cross iend[i], + * as long as it is above ilowest, but that indicates corruption. + */ +typedef struct { + BYTE const* ip[4]; + BYTE* op[4]; + U64 bits[4]; + void const* dt; + BYTE const* ilowest; + BYTE* oend; + BYTE const* iend[4]; +} HUF_DecompressFastArgs; + +typedef void (*HUF_DecompressFastLoopFn)(HUF_DecompressFastArgs*); + +/** + * Initializes args for the fast decoding loop. + * @returns 1 on success + * 0 if the fallback implementation should be used. + * Or an error code on failure. + */ +static size_t HUF_DecompressFastArgs_init(HUF_DecompressFastArgs* args, void* dst, size_t dstSize, void const* src, size_t srcSize, const HUF_DTable* DTable) +{ + void const* dt = DTable + 1; + U32 const dtLog = HUF_getDTableDesc(DTable).tableLog; + + const BYTE* const istart = (const BYTE*)src; + + BYTE* const oend = ZSTD_maybeNullPtrAdd((BYTE*)dst, dstSize); + + /* The fast decoding loop assumes 64-bit little-endian. + * This condition is false on x32. + */ + if (!MEM_isLittleEndian() || MEM_32bits()) + return 0; + + /* Avoid nullptr addition */ + if (dstSize == 0) + return 0; + assert(dst != NULL); + + /* strict minimum : jump table + 1 byte per stream */ + if (srcSize < 10) + return ERROR(corruption_detected); + + /* Must have at least 8 bytes per stream because we don't handle initializing smaller bit containers. + * If table log is not correct at this point, fallback to the old decoder. + * On small inputs we don't have enough data to trigger the fast loop, so use the old decoder. + */ + if (dtLog != HUF_DECODER_FAST_TABLELOG) + return 0; + + /* Read the jump table. */ + { + size_t const length1 = MEM_readLE16(istart); + size_t const length2 = MEM_readLE16(istart+2); + size_t const length3 = MEM_readLE16(istart+4); + size_t const length4 = srcSize - (length1 + length2 + length3 + 6); + args->iend[0] = istart + 6; /* jumpTable */ + args->iend[1] = args->iend[0] + length1; + args->iend[2] = args->iend[1] + length2; + args->iend[3] = args->iend[2] + length3; + + /* HUF_initFastDStream() requires this, and this small of an input + * won't benefit from the ASM loop anyways. + */ + if (length1 < 8 || length2 < 8 || length3 < 8 || length4 < 8) + return 0; + if (length4 > srcSize) return ERROR(corruption_detected); /* overflow */ + } + /* ip[] contains the position that is currently loaded into bits[]. */ + args->ip[0] = args->iend[1] - sizeof(U64); + args->ip[1] = args->iend[2] - sizeof(U64); + args->ip[2] = args->iend[3] - sizeof(U64); + args->ip[3] = (BYTE const*)src + srcSize - sizeof(U64); + + /* op[] contains the output pointers. */ + args->op[0] = (BYTE*)dst; + args->op[1] = args->op[0] + (dstSize+3)/4; + args->op[2] = args->op[1] + (dstSize+3)/4; + args->op[3] = args->op[2] + (dstSize+3)/4; + + /* No point to call the ASM loop for tiny outputs. */ + if (args->op[3] >= oend) + return 0; + + /* bits[] is the bit container. + * It is read from the MSB down to the LSB. + * It is shifted left as it is read, and zeros are + * shifted in. After the lowest valid bit a 1 is + * set, so that CountTrailingZeros(bits[]) can be used + * to count how many bits we've consumed. + */ + args->bits[0] = HUF_initFastDStream(args->ip[0]); + args->bits[1] = HUF_initFastDStream(args->ip[1]); + args->bits[2] = HUF_initFastDStream(args->ip[2]); + args->bits[3] = HUF_initFastDStream(args->ip[3]); + + /* The decoders must be sure to never read beyond ilowest. + * This is lower than iend[0], but allowing decoders to read + * down to ilowest can allow an extra iteration or two in the + * fast loop. + */ + args->ilowest = istart; + + args->oend = oend; + args->dt = dt; + + return 1; +} + +static size_t HUF_initRemainingDStream(BIT_DStream_t* bit, HUF_DecompressFastArgs const* args, int stream, BYTE* segmentEnd) +{ + /* Validate that we haven't overwritten. */ + if (args->op[stream] > segmentEnd) + return ERROR(corruption_detected); + /* Validate that we haven't read beyond iend[]. + * Note that ip[] may be < iend[] because the MSB is + * the next bit to read, and we may have consumed 100% + * of the stream, so down to iend[i] - 8 is valid. + */ + if (args->ip[stream] < args->iend[stream] - 8) + return ERROR(corruption_detected); + + /* Construct the BIT_DStream_t. */ + assert(sizeof(size_t) == 8); + bit->bitContainer = MEM_readLEST(args->ip[stream]); + bit->bitsConsumed = ZSTD_countTrailingZeros64(args->bits[stream]); + bit->start = (const char*)args->ilowest; + bit->limitPtr = bit->start + sizeof(size_t); + bit->ptr = (const char*)args->ip[stream]; + + return 0; +} + +/* Calls X(N) for each stream 0, 1, 2, 3. */ +#define HUF_4X_FOR_EACH_STREAM(X) \ + do { \ + X(0); \ + X(1); \ + X(2); \ + X(3); \ + } while (0) + +/* Calls X(N, var) for each stream 0, 1, 2, 3. */ +#define HUF_4X_FOR_EACH_STREAM_WITH_VAR(X, var) \ + do { \ + X(0, (var)); \ + X(1, (var)); \ + X(2, (var)); \ + X(3, (var)); \ + } while (0) + + +#ifndef HUF_FORCE_DECOMPRESS_X2 + +/*-***************************/ +/* single-symbol decoding */ +/*-***************************/ +typedef struct { BYTE nbBits; BYTE byte; } HUF_DEltX1; /* single-symbol decoding */ + +/** + * Packs 4 HUF_DEltX1 structs into a U64. This is used to lay down 4 entries at + * a time. + */ +static U64 HUF_DEltX1_set4(BYTE symbol, BYTE nbBits) { + U64 D4; + if (MEM_isLittleEndian()) { + D4 = (U64)((symbol << 8) + nbBits); + } else { + D4 = (U64)(symbol + (nbBits << 8)); + } + assert(D4 < (1U << 16)); + D4 *= 0x0001000100010001ULL; + return D4; +} + +/** + * Increase the tableLog to targetTableLog and rescales the stats. + * If tableLog > targetTableLog this is a no-op. + * @returns New tableLog + */ +static U32 HUF_rescaleStats(BYTE* huffWeight, U32* rankVal, U32 nbSymbols, U32 tableLog, U32 targetTableLog) +{ + if (tableLog > targetTableLog) + return tableLog; + if (tableLog < targetTableLog) { + U32 const scale = targetTableLog - tableLog; + U32 s; + /* Increase the weight for all non-zero probability symbols by scale. */ + for (s = 0; s < nbSymbols; ++s) { + huffWeight[s] += (BYTE)((huffWeight[s] == 0) ? 0 : scale); + } + /* Update rankVal to reflect the new weights. + * All weights except 0 get moved to weight + scale. + * Weights [1, scale] are empty. + */ + for (s = targetTableLog; s > scale; --s) { + rankVal[s] = rankVal[s - scale]; + } + for (s = scale; s > 0; --s) { + rankVal[s] = 0; + } + } + return targetTableLog; +} + +typedef struct { + U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; + U32 rankStart[HUF_TABLELOG_ABSOLUTEMAX + 1]; + U32 statsWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32]; + BYTE symbols[HUF_SYMBOLVALUE_MAX + 1]; + BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1]; +} HUF_ReadDTableX1_Workspace; + +size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int flags) +{ + U32 tableLog = 0; + U32 nbSymbols = 0; + size_t iSize; + void* const dtPtr = DTable + 1; + HUF_DEltX1* const dt = (HUF_DEltX1*)dtPtr; + HUF_ReadDTableX1_Workspace* wksp = (HUF_ReadDTableX1_Workspace*)workSpace; + + DEBUG_STATIC_ASSERT(HUF_DECOMPRESS_WORKSPACE_SIZE >= sizeof(*wksp)); + if (sizeof(*wksp) > wkspSize) return ERROR(tableLog_tooLarge); + + DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable)); + /* ZSTD_memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */ + + iSize = HUF_readStats_wksp(wksp->huffWeight, HUF_SYMBOLVALUE_MAX + 1, wksp->rankVal, &nbSymbols, &tableLog, src, srcSize, wksp->statsWksp, sizeof(wksp->statsWksp), flags); + if (HUF_isError(iSize)) return iSize; + + + /* Table header */ + { DTableDesc dtd = HUF_getDTableDesc(DTable); + U32 const maxTableLog = dtd.maxTableLog + 1; + U32 const targetTableLog = MIN(maxTableLog, HUF_DECODER_FAST_TABLELOG); + tableLog = HUF_rescaleStats(wksp->huffWeight, wksp->rankVal, nbSymbols, tableLog, targetTableLog); + if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge); /* DTable too small, Huffman tree cannot fit in */ + dtd.tableType = 0; + dtd.tableLog = (BYTE)tableLog; + ZSTD_memcpy(DTable, &dtd, sizeof(dtd)); + } + + /* Compute symbols and rankStart given rankVal: + * + * rankVal already contains the number of values of each weight. + * + * symbols contains the symbols ordered by weight. First are the rankVal[0] + * weight 0 symbols, followed by the rankVal[1] weight 1 symbols, and so on. + * symbols[0] is filled (but unused) to avoid a branch. + * + * rankStart contains the offset where each rank belongs in the DTable. + * rankStart[0] is not filled because there are no entries in the table for + * weight 0. + */ + { int n; + U32 nextRankStart = 0; + int const unroll = 4; + int const nLimit = (int)nbSymbols - unroll + 1; + for (n=0; n<(int)tableLog+1; n++) { + U32 const curr = nextRankStart; + nextRankStart += wksp->rankVal[n]; + wksp->rankStart[n] = curr; + } + for (n=0; n < nLimit; n += unroll) { + int u; + for (u=0; u < unroll; ++u) { + size_t const w = wksp->huffWeight[n+u]; + wksp->symbols[wksp->rankStart[w]++] = (BYTE)(n+u); + } + } + for (; n < (int)nbSymbols; ++n) { + size_t const w = wksp->huffWeight[n]; + wksp->symbols[wksp->rankStart[w]++] = (BYTE)n; + } + } + + /* fill DTable + * We fill all entries of each weight in order. + * That way length is a constant for each iteration of the outer loop. + * We can switch based on the length to a different inner loop which is + * optimized for that particular case. + */ + { U32 w; + int symbol = wksp->rankVal[0]; + int rankStart = 0; + for (w=1; wrankVal[w]; + int const length = (1 << w) >> 1; + int uStart = rankStart; + BYTE const nbBits = (BYTE)(tableLog + 1 - w); + int s; + int u; + switch (length) { + case 1: + for (s=0; ssymbols[symbol + s]; + D.nbBits = nbBits; + dt[uStart] = D; + uStart += 1; + } + break; + case 2: + for (s=0; ssymbols[symbol + s]; + D.nbBits = nbBits; + dt[uStart+0] = D; + dt[uStart+1] = D; + uStart += 2; + } + break; + case 4: + for (s=0; ssymbols[symbol + s], nbBits); + MEM_write64(dt + uStart, D4); + uStart += 4; + } + break; + case 8: + for (s=0; ssymbols[symbol + s], nbBits); + MEM_write64(dt + uStart, D4); + MEM_write64(dt + uStart + 4, D4); + uStart += 8; + } + break; + default: + for (s=0; ssymbols[symbol + s], nbBits); + for (u=0; u < length; u += 16) { + MEM_write64(dt + uStart + u + 0, D4); + MEM_write64(dt + uStart + u + 4, D4); + MEM_write64(dt + uStart + u + 8, D4); + MEM_write64(dt + uStart + u + 12, D4); + } + assert(u == length); + uStart += length; + } + break; + } + symbol += symbolCount; + rankStart += symbolCount * length; + } + } + return iSize; +} + +FORCE_INLINE_TEMPLATE BYTE +HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog) +{ + size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */ + BYTE const c = dt[val].byte; + BIT_skipBits(Dstream, dt[val].nbBits); + return c; +} + +#define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \ + do { *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog); } while (0) + +#define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr) \ + do { \ + if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \ + HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr); \ + } while (0) + +#define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \ + do { \ + if (MEM_64bits()) \ + HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr); \ + } while (0) + +HINT_INLINE size_t +HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX1* const dt, const U32 dtLog) +{ + BYTE* const pStart = p; + + /* up to 4 symbols at a time */ + if ((pEnd - p) > 3) { + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) { + HUF_DECODE_SYMBOLX1_2(p, bitDPtr); + HUF_DECODE_SYMBOLX1_1(p, bitDPtr); + HUF_DECODE_SYMBOLX1_2(p, bitDPtr); + HUF_DECODE_SYMBOLX1_0(p, bitDPtr); + } + } else { + BIT_reloadDStream(bitDPtr); + } + + /* [0-3] symbols remaining */ + if (MEM_32bits()) + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd)) + HUF_DECODE_SYMBOLX1_0(p, bitDPtr); + + /* no more data to retrieve from bitstream, no need to reload */ + while (p < pEnd) + HUF_DECODE_SYMBOLX1_0(p, bitDPtr); + + return (size_t)(pEnd-pStart); +} + +FORCE_INLINE_TEMPLATE size_t +HUF_decompress1X1_usingDTable_internal_body( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + BYTE* op = (BYTE*)dst; + BYTE* const oend = ZSTD_maybeNullPtrAdd(op, dstSize); + const void* dtPtr = DTable + 1; + const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr; + BIT_DStream_t bitD; + DTableDesc const dtd = HUF_getDTableDesc(DTable); + U32 const dtLog = dtd.tableLog; + + CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) ); + + HUF_decodeStreamX1(op, &bitD, oend, dt, dtLog); + + if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected); + + return dstSize; +} + +/* HUF_decompress4X1_usingDTable_internal_body(): + * Conditions : + * @dstSize >= 6 + */ +FORCE_INLINE_TEMPLATE size_t +HUF_decompress4X1_usingDTable_internal_body( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + /* Check */ + if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ + if (dstSize < 6) return ERROR(corruption_detected); /* stream 4-split doesn't work */ + + { const BYTE* const istart = (const BYTE*) cSrc; + BYTE* const ostart = (BYTE*) dst; + BYTE* const oend = ostart + dstSize; + BYTE* const olimit = oend - 3; + const void* const dtPtr = DTable + 1; + const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr; + + /* Init */ + BIT_DStream_t bitD1; + BIT_DStream_t bitD2; + BIT_DStream_t bitD3; + BIT_DStream_t bitD4; + size_t const length1 = MEM_readLE16(istart); + size_t const length2 = MEM_readLE16(istart+2); + size_t const length3 = MEM_readLE16(istart+4); + size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6); + const BYTE* const istart1 = istart + 6; /* jumpTable */ + const BYTE* const istart2 = istart1 + length1; + const BYTE* const istart3 = istart2 + length2; + const BYTE* const istart4 = istart3 + length3; + const size_t segmentSize = (dstSize+3) / 4; + BYTE* const opStart2 = ostart + segmentSize; + BYTE* const opStart3 = opStart2 + segmentSize; + BYTE* const opStart4 = opStart3 + segmentSize; + BYTE* op1 = ostart; + BYTE* op2 = opStart2; + BYTE* op3 = opStart3; + BYTE* op4 = opStart4; + DTableDesc const dtd = HUF_getDTableDesc(DTable); + U32 const dtLog = dtd.tableLog; + U32 endSignal = 1; + + if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ + if (opStart4 > oend) return ERROR(corruption_detected); /* overflow */ + assert(dstSize >= 6); /* validated above */ + CHECK_F( BIT_initDStream(&bitD1, istart1, length1) ); + CHECK_F( BIT_initDStream(&bitD2, istart2, length2) ); + CHECK_F( BIT_initDStream(&bitD3, istart3, length3) ); + CHECK_F( BIT_initDStream(&bitD4, istart4, length4) ); + + /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */ + if ((size_t)(oend - op4) >= sizeof(size_t)) { + for ( ; (endSignal) & (op4 < olimit) ; ) { + HUF_DECODE_SYMBOLX1_2(op1, &bitD1); + HUF_DECODE_SYMBOLX1_2(op2, &bitD2); + HUF_DECODE_SYMBOLX1_2(op3, &bitD3); + HUF_DECODE_SYMBOLX1_2(op4, &bitD4); + HUF_DECODE_SYMBOLX1_1(op1, &bitD1); + HUF_DECODE_SYMBOLX1_1(op2, &bitD2); + HUF_DECODE_SYMBOLX1_1(op3, &bitD3); + HUF_DECODE_SYMBOLX1_1(op4, &bitD4); + HUF_DECODE_SYMBOLX1_2(op1, &bitD1); + HUF_DECODE_SYMBOLX1_2(op2, &bitD2); + HUF_DECODE_SYMBOLX1_2(op3, &bitD3); + HUF_DECODE_SYMBOLX1_2(op4, &bitD4); + HUF_DECODE_SYMBOLX1_0(op1, &bitD1); + HUF_DECODE_SYMBOLX1_0(op2, &bitD2); + HUF_DECODE_SYMBOLX1_0(op3, &bitD3); + HUF_DECODE_SYMBOLX1_0(op4, &bitD4); + endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished; + endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished; + endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished; + endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished; + } + } + + /* check corruption */ + /* note : should not be necessary : op# advance in lock step, and we control op4. + * but curiously, binary generated by gcc 7.2 & 7.3 with -mbmi2 runs faster when >=1 test is present */ + if (op1 > opStart2) return ERROR(corruption_detected); + if (op2 > opStart3) return ERROR(corruption_detected); + if (op3 > opStart4) return ERROR(corruption_detected); + /* note : op4 supposed already verified within main loop */ + + /* finish bitStreams one by one */ + HUF_decodeStreamX1(op1, &bitD1, opStart2, dt, dtLog); + HUF_decodeStreamX1(op2, &bitD2, opStart3, dt, dtLog); + HUF_decodeStreamX1(op3, &bitD3, opStart4, dt, dtLog); + HUF_decodeStreamX1(op4, &bitD4, oend, dt, dtLog); + + /* check */ + { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); + if (!endCheck) return ERROR(corruption_detected); } + + /* decoded size */ + return dstSize; + } +} + +#if HUF_NEED_BMI2_FUNCTION +static BMI2_TARGET_ATTRIBUTE +size_t HUF_decompress4X1_usingDTable_internal_bmi2(void* dst, size_t dstSize, void const* cSrc, + size_t cSrcSize, HUF_DTable const* DTable) { + return HUF_decompress4X1_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable); +} +#endif + +static +size_t HUF_decompress4X1_usingDTable_internal_default(void* dst, size_t dstSize, void const* cSrc, + size_t cSrcSize, HUF_DTable const* DTable) { + return HUF_decompress4X1_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable); +} + +#if ZSTD_ENABLE_ASM_X86_64_BMI2 + +HUF_ASM_DECL void HUF_decompress4X1_usingDTable_internal_fast_asm_loop(HUF_DecompressFastArgs* args) ZSTDLIB_HIDDEN; + +#endif + +static HUF_FAST_BMI2_ATTRS +void HUF_decompress4X1_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs* args) +{ + U64 bits[4]; + BYTE const* ip[4]; + BYTE* op[4]; + U16 const* const dtable = (U16 const*)args->dt; + BYTE* const oend = args->oend; + BYTE const* const ilowest = args->ilowest; + + /* Copy the arguments to local variables */ + ZSTD_memcpy(&bits, &args->bits, sizeof(bits)); + ZSTD_memcpy((void*)(&ip), &args->ip, sizeof(ip)); + ZSTD_memcpy(&op, &args->op, sizeof(op)); + + assert(MEM_isLittleEndian()); + assert(!MEM_32bits()); + + for (;;) { + BYTE* olimit; + int stream; + + /* Assert loop preconditions */ +#ifndef NDEBUG + for (stream = 0; stream < 4; ++stream) { + assert(op[stream] <= (stream == 3 ? oend : op[stream + 1])); + assert(ip[stream] >= ilowest); + } +#endif + /* Compute olimit */ + { + /* Each iteration produces 5 output symbols per stream */ + size_t const oiters = (size_t)(oend - op[3]) / 5; + /* Each iteration consumes up to 11 bits * 5 = 55 bits < 7 bytes + * per stream. + */ + size_t const iiters = (size_t)(ip[0] - ilowest) / 7; + /* We can safely run iters iterations before running bounds checks */ + size_t const iters = MIN(oiters, iiters); + size_t const symbols = iters * 5; + + /* We can simply check that op[3] < olimit, instead of checking all + * of our bounds, since we can't hit the other bounds until we've run + * iters iterations, which only happens when op[3] == olimit. + */ + olimit = op[3] + symbols; + + /* Exit fast decoding loop once we reach the end. */ + if (op[3] == olimit) + break; + + /* Exit the decoding loop if any input pointer has crossed the + * previous one. This indicates corruption, and a precondition + * to our loop is that ip[i] >= ip[0]. + */ + for (stream = 1; stream < 4; ++stream) { + if (ip[stream] < ip[stream - 1]) + goto _out; + } + } + +#ifndef NDEBUG + for (stream = 1; stream < 4; ++stream) { + assert(ip[stream] >= ip[stream - 1]); + } +#endif + +#define HUF_4X1_DECODE_SYMBOL(_stream, _symbol) \ + do { \ + int const index = (int)(bits[(_stream)] >> 53); \ + int const entry = (int)dtable[index]; \ + bits[(_stream)] <<= (entry & 0x3F); \ + op[(_stream)][(_symbol)] = (BYTE)((entry >> 8) & 0xFF); \ + } while (0) + +#define HUF_4X1_RELOAD_STREAM(_stream) \ + do { \ + int const ctz = ZSTD_countTrailingZeros64(bits[(_stream)]); \ + int const nbBits = ctz & 7; \ + int const nbBytes = ctz >> 3; \ + op[(_stream)] += 5; \ + ip[(_stream)] -= nbBytes; \ + bits[(_stream)] = MEM_read64(ip[(_stream)]) | 1; \ + bits[(_stream)] <<= nbBits; \ + } while (0) + + /* Manually unroll the loop because compilers don't consistently + * unroll the inner loops, which destroys performance. + */ + do { + /* Decode 5 symbols in each of the 4 streams */ + HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 0); + HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 1); + HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 2); + HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 3); + HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 4); + + /* Reload each of the 4 the bitstreams */ + HUF_4X_FOR_EACH_STREAM(HUF_4X1_RELOAD_STREAM); + } while (op[3] < olimit); + +#undef HUF_4X1_DECODE_SYMBOL +#undef HUF_4X1_RELOAD_STREAM + } + +_out: + + /* Save the final values of each of the state variables back to args. */ + ZSTD_memcpy(&args->bits, &bits, sizeof(bits)); + ZSTD_memcpy((void*)(&args->ip), &ip, sizeof(ip)); + ZSTD_memcpy(&args->op, &op, sizeof(op)); +} + +/** + * @returns @p dstSize on success (>= 6) + * 0 if the fallback implementation should be used + * An error if an error occurred + */ +static HUF_FAST_BMI2_ATTRS +size_t +HUF_decompress4X1_usingDTable_internal_fast( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable, + HUF_DecompressFastLoopFn loopFn) +{ + void const* dt = DTable + 1; + BYTE const* const ilowest = (BYTE const*)cSrc; + BYTE* const oend = ZSTD_maybeNullPtrAdd((BYTE*)dst, dstSize); + HUF_DecompressFastArgs args; + { size_t const ret = HUF_DecompressFastArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable); + FORWARD_IF_ERROR(ret, "Failed to init fast loop args"); + if (ret == 0) + return 0; + } + + assert(args.ip[0] >= args.ilowest); + loopFn(&args); + + /* Our loop guarantees that ip[] >= ilowest and that we haven't + * overwritten any op[]. + */ + assert(args.ip[0] >= ilowest); + assert(args.ip[0] >= ilowest); + assert(args.ip[1] >= ilowest); + assert(args.ip[2] >= ilowest); + assert(args.ip[3] >= ilowest); + assert(args.op[3] <= oend); + + assert(ilowest == args.ilowest); + assert(ilowest + 6 == args.iend[0]); + (void)ilowest; + + /* finish bit streams one by one. */ + { size_t const segmentSize = (dstSize+3) / 4; + BYTE* segmentEnd = (BYTE*)dst; + int i; + for (i = 0; i < 4; ++i) { + BIT_DStream_t bit; + if (segmentSize <= (size_t)(oend - segmentEnd)) + segmentEnd += segmentSize; + else + segmentEnd = oend; + FORWARD_IF_ERROR(HUF_initRemainingDStream(&bit, &args, i, segmentEnd), "corruption"); + /* Decompress and validate that we've produced exactly the expected length. */ + args.op[i] += HUF_decodeStreamX1(args.op[i], &bit, segmentEnd, (HUF_DEltX1 const*)dt, HUF_DECODER_FAST_TABLELOG); + if (args.op[i] != segmentEnd) return ERROR(corruption_detected); + } + } + + /* decoded size */ + assert(dstSize != 0); + return dstSize; +} + +HUF_DGEN(HUF_decompress1X1_usingDTable_internal) + +static size_t HUF_decompress4X1_usingDTable_internal(void* dst, size_t dstSize, void const* cSrc, + size_t cSrcSize, HUF_DTable const* DTable, int flags) +{ + HUF_DecompressUsingDTableFn fallbackFn = HUF_decompress4X1_usingDTable_internal_default; + HUF_DecompressFastLoopFn loopFn = HUF_decompress4X1_usingDTable_internal_fast_c_loop; + +#if DYNAMIC_BMI2 + if (flags & HUF_flags_bmi2) { + fallbackFn = HUF_decompress4X1_usingDTable_internal_bmi2; +# if ZSTD_ENABLE_ASM_X86_64_BMI2 + if (!(flags & HUF_flags_disableAsm)) { + loopFn = HUF_decompress4X1_usingDTable_internal_fast_asm_loop; + } +# endif + } else { + return fallbackFn(dst, dstSize, cSrc, cSrcSize, DTable); + } +#endif + +#if ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__) + if (!(flags & HUF_flags_disableAsm)) { + loopFn = HUF_decompress4X1_usingDTable_internal_fast_asm_loop; + } +#endif + + if (HUF_ENABLE_FAST_DECODE && !(flags & HUF_flags_disableFast)) { + size_t const ret = HUF_decompress4X1_usingDTable_internal_fast(dst, dstSize, cSrc, cSrcSize, DTable, loopFn); + if (ret != 0) + return ret; + } + return fallbackFn(dst, dstSize, cSrc, cSrcSize, DTable); +} + +static size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize, int flags) +{ + const BYTE* ip = (const BYTE*) cSrc; + + size_t const hSize = HUF_readDTableX1_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize, flags); + if (HUF_isError(hSize)) return hSize; + if (hSize >= cSrcSize) return ERROR(srcSize_wrong); + ip += hSize; cSrcSize -= hSize; + + return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, flags); +} + +#endif /* HUF_FORCE_DECOMPRESS_X2 */ + + +#ifndef HUF_FORCE_DECOMPRESS_X1 + +/* *************************/ +/* double-symbols decoding */ +/* *************************/ + +typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX2; /* double-symbols decoding */ +typedef struct { BYTE symbol; } sortedSymbol_t; +typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1]; +typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX]; + +/** + * Constructs a HUF_DEltX2 in a U32. + */ +static U32 HUF_buildDEltX2U32(U32 symbol, U32 nbBits, U32 baseSeq, int level) +{ + U32 seq; + DEBUG_STATIC_ASSERT(offsetof(HUF_DEltX2, sequence) == 0); + DEBUG_STATIC_ASSERT(offsetof(HUF_DEltX2, nbBits) == 2); + DEBUG_STATIC_ASSERT(offsetof(HUF_DEltX2, length) == 3); + DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(U32)); + if (MEM_isLittleEndian()) { + seq = level == 1 ? symbol : (baseSeq + (symbol << 8)); + return seq + (nbBits << 16) + ((U32)level << 24); + } else { + seq = level == 1 ? (symbol << 8) : ((baseSeq << 8) + symbol); + return (seq << 16) + (nbBits << 8) + (U32)level; + } +} + +/** + * Constructs a HUF_DEltX2. + */ +static HUF_DEltX2 HUF_buildDEltX2(U32 symbol, U32 nbBits, U32 baseSeq, int level) +{ + HUF_DEltX2 DElt; + U32 const val = HUF_buildDEltX2U32(symbol, nbBits, baseSeq, level); + DEBUG_STATIC_ASSERT(sizeof(DElt) == sizeof(val)); + ZSTD_memcpy(&DElt, &val, sizeof(val)); + return DElt; +} + +/** + * Constructs 2 HUF_DEltX2s and packs them into a U64. + */ +static U64 HUF_buildDEltX2U64(U32 symbol, U32 nbBits, U16 baseSeq, int level) +{ + U32 DElt = HUF_buildDEltX2U32(symbol, nbBits, baseSeq, level); + return (U64)DElt + ((U64)DElt << 32); +} + +/** + * Fills the DTable rank with all the symbols from [begin, end) that are each + * nbBits long. + * + * @param DTableRank The start of the rank in the DTable. + * @param begin The first symbol to fill (inclusive). + * @param end The last symbol to fill (exclusive). + * @param nbBits Each symbol is nbBits long. + * @param tableLog The table log. + * @param baseSeq If level == 1 { 0 } else { the first level symbol } + * @param level The level in the table. Must be 1 or 2. + */ +static void HUF_fillDTableX2ForWeight( + HUF_DEltX2* DTableRank, + sortedSymbol_t const* begin, sortedSymbol_t const* end, + U32 nbBits, U32 tableLog, + U16 baseSeq, int const level) +{ + U32 const length = 1U << ((tableLog - nbBits) & 0x1F /* quiet static-analyzer */); + const sortedSymbol_t* ptr; + assert(level >= 1 && level <= 2); + switch (length) { + case 1: + for (ptr = begin; ptr != end; ++ptr) { + HUF_DEltX2 const DElt = HUF_buildDEltX2(ptr->symbol, nbBits, baseSeq, level); + *DTableRank++ = DElt; + } + break; + case 2: + for (ptr = begin; ptr != end; ++ptr) { + HUF_DEltX2 const DElt = HUF_buildDEltX2(ptr->symbol, nbBits, baseSeq, level); + DTableRank[0] = DElt; + DTableRank[1] = DElt; + DTableRank += 2; + } + break; + case 4: + for (ptr = begin; ptr != end; ++ptr) { + U64 const DEltX2 = HUF_buildDEltX2U64(ptr->symbol, nbBits, baseSeq, level); + ZSTD_memcpy(DTableRank + 0, &DEltX2, sizeof(DEltX2)); + ZSTD_memcpy(DTableRank + 2, &DEltX2, sizeof(DEltX2)); + DTableRank += 4; + } + break; + case 8: + for (ptr = begin; ptr != end; ++ptr) { + U64 const DEltX2 = HUF_buildDEltX2U64(ptr->symbol, nbBits, baseSeq, level); + ZSTD_memcpy(DTableRank + 0, &DEltX2, sizeof(DEltX2)); + ZSTD_memcpy(DTableRank + 2, &DEltX2, sizeof(DEltX2)); + ZSTD_memcpy(DTableRank + 4, &DEltX2, sizeof(DEltX2)); + ZSTD_memcpy(DTableRank + 6, &DEltX2, sizeof(DEltX2)); + DTableRank += 8; + } + break; + default: + for (ptr = begin; ptr != end; ++ptr) { + U64 const DEltX2 = HUF_buildDEltX2U64(ptr->symbol, nbBits, baseSeq, level); + HUF_DEltX2* const DTableRankEnd = DTableRank + length; + for (; DTableRank != DTableRankEnd; DTableRank += 8) { + ZSTD_memcpy(DTableRank + 0, &DEltX2, sizeof(DEltX2)); + ZSTD_memcpy(DTableRank + 2, &DEltX2, sizeof(DEltX2)); + ZSTD_memcpy(DTableRank + 4, &DEltX2, sizeof(DEltX2)); + ZSTD_memcpy(DTableRank + 6, &DEltX2, sizeof(DEltX2)); + } + } + break; + } +} + +/* HUF_fillDTableX2Level2() : + * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */ +static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 targetLog, const U32 consumedBits, + const U32* rankVal, const int minWeight, const int maxWeight1, + const sortedSymbol_t* sortedSymbols, U32 const* rankStart, + U32 nbBitsBaseline, U16 baseSeq) +{ + /* Fill skipped values (all positions up to rankVal[minWeight]). + * These are positions only get a single symbol because the combined weight + * is too large. + */ + if (minWeight>1) { + U32 const length = 1U << ((targetLog - consumedBits) & 0x1F /* quiet static-analyzer */); + U64 const DEltX2 = HUF_buildDEltX2U64(baseSeq, consumedBits, /* baseSeq */ 0, /* level */ 1); + int const skipSize = rankVal[minWeight]; + assert(length > 1); + assert((U32)skipSize < length); + switch (length) { + case 2: + assert(skipSize == 1); + ZSTD_memcpy(DTable, &DEltX2, sizeof(DEltX2)); + break; + case 4: + assert(skipSize <= 4); + ZSTD_memcpy(DTable + 0, &DEltX2, sizeof(DEltX2)); + ZSTD_memcpy(DTable + 2, &DEltX2, sizeof(DEltX2)); + break; + default: + { + int i; + for (i = 0; i < skipSize; i += 8) { + ZSTD_memcpy(DTable + i + 0, &DEltX2, sizeof(DEltX2)); + ZSTD_memcpy(DTable + i + 2, &DEltX2, sizeof(DEltX2)); + ZSTD_memcpy(DTable + i + 4, &DEltX2, sizeof(DEltX2)); + ZSTD_memcpy(DTable + i + 6, &DEltX2, sizeof(DEltX2)); + } + } + } + } + + /* Fill each of the second level symbols by weight. */ + { + int w; + for (w = minWeight; w < maxWeight1; ++w) { + int const begin = rankStart[w]; + int const end = rankStart[w+1]; + U32 const nbBits = nbBitsBaseline - w; + U32 const totalBits = nbBits + consumedBits; + HUF_fillDTableX2ForWeight( + DTable + rankVal[w], + sortedSymbols + begin, sortedSymbols + end, + totalBits, targetLog, + baseSeq, /* level */ 2); + } + } +} + +static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog, + const sortedSymbol_t* sortedList, + const U32* rankStart, rankValCol_t* rankValOrigin, const U32 maxWeight, + const U32 nbBitsBaseline) +{ + U32* const rankVal = rankValOrigin[0]; + const int scaleLog = nbBitsBaseline - targetLog; /* note : targetLog >= srcLog, hence scaleLog <= 1 */ + const U32 minBits = nbBitsBaseline - maxWeight; + int w; + int const wEnd = (int)maxWeight + 1; + + /* Fill DTable in order of weight. */ + for (w = 1; w < wEnd; ++w) { + int const begin = (int)rankStart[w]; + int const end = (int)rankStart[w+1]; + U32 const nbBits = nbBitsBaseline - w; + + if (targetLog-nbBits >= minBits) { + /* Enough room for a second symbol. */ + int start = rankVal[w]; + U32 const length = 1U << ((targetLog - nbBits) & 0x1F /* quiet static-analyzer */); + int minWeight = nbBits + scaleLog; + int s; + if (minWeight < 1) minWeight = 1; + /* Fill the DTable for every symbol of weight w. + * These symbols get at least 1 second symbol. + */ + for (s = begin; s != end; ++s) { + HUF_fillDTableX2Level2( + DTable + start, targetLog, nbBits, + rankValOrigin[nbBits], minWeight, wEnd, + sortedList, rankStart, + nbBitsBaseline, sortedList[s].symbol); + start += length; + } + } else { + /* Only a single symbol. */ + HUF_fillDTableX2ForWeight( + DTable + rankVal[w], + sortedList + begin, sortedList + end, + nbBits, targetLog, + /* baseSeq */ 0, /* level */ 1); + } + } +} + +typedef struct { + rankValCol_t rankVal[HUF_TABLELOG_MAX]; + U32 rankStats[HUF_TABLELOG_MAX + 1]; + U32 rankStart0[HUF_TABLELOG_MAX + 3]; + sortedSymbol_t sortedSymbol[HUF_SYMBOLVALUE_MAX + 1]; + BYTE weightList[HUF_SYMBOLVALUE_MAX + 1]; + U32 calleeWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32]; +} HUF_ReadDTableX2_Workspace; + +size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, + const void* src, size_t srcSize, + void* workSpace, size_t wkspSize, int flags) +{ + U32 tableLog, maxW, nbSymbols; + DTableDesc dtd = HUF_getDTableDesc(DTable); + U32 maxTableLog = dtd.maxTableLog; + size_t iSize; + void* dtPtr = DTable+1; /* force compiler to avoid strict-aliasing */ + HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr; + U32 *rankStart; + + HUF_ReadDTableX2_Workspace* const wksp = (HUF_ReadDTableX2_Workspace*)workSpace; + + if (sizeof(*wksp) > wkspSize) return ERROR(GENERIC); + + rankStart = wksp->rankStart0 + 1; + ZSTD_memset(wksp->rankStats, 0, sizeof(wksp->rankStats)); + ZSTD_memset(wksp->rankStart0, 0, sizeof(wksp->rankStart0)); + + DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */ + if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); + /* ZSTD_memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */ + + iSize = HUF_readStats_wksp(wksp->weightList, HUF_SYMBOLVALUE_MAX + 1, wksp->rankStats, &nbSymbols, &tableLog, src, srcSize, wksp->calleeWksp, sizeof(wksp->calleeWksp), flags); + if (HUF_isError(iSize)) return iSize; + + /* check result */ + if (tableLog > maxTableLog) return ERROR(tableLog_tooLarge); /* DTable can't fit code depth */ + if (tableLog <= HUF_DECODER_FAST_TABLELOG && maxTableLog > HUF_DECODER_FAST_TABLELOG) maxTableLog = HUF_DECODER_FAST_TABLELOG; + + /* find maxWeight */ + for (maxW = tableLog; wksp->rankStats[maxW]==0; maxW--) {} /* necessarily finds a solution before 0 */ + + /* Get start index of each weight */ + { U32 w, nextRankStart = 0; + for (w=1; wrankStats[w]; + rankStart[w] = curr; + } + rankStart[0] = nextRankStart; /* put all 0w symbols at the end of sorted list*/ + rankStart[maxW+1] = nextRankStart; + } + + /* sort symbols by weight */ + { U32 s; + for (s=0; sweightList[s]; + U32 const r = rankStart[w]++; + wksp->sortedSymbol[r].symbol = (BYTE)s; + } + rankStart[0] = 0; /* forget 0w symbols; this is beginning of weight(1) */ + } + + /* Build rankVal */ + { U32* const rankVal0 = wksp->rankVal[0]; + { int const rescale = (maxTableLog-tableLog) - 1; /* tableLog <= maxTableLog */ + U32 nextRankVal = 0; + U32 w; + for (w=1; wrankStats[w] << (w+rescale); + rankVal0[w] = curr; + } } + { U32 const minBits = tableLog+1 - maxW; + U32 consumed; + for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++) { + U32* const rankValPtr = wksp->rankVal[consumed]; + U32 w; + for (w = 1; w < maxW+1; w++) { + rankValPtr[w] = rankVal0[w] >> consumed; + } } } } + + HUF_fillDTableX2(dt, maxTableLog, + wksp->sortedSymbol, + wksp->rankStart0, wksp->rankVal, maxW, + tableLog+1); + + dtd.tableLog = (BYTE)maxTableLog; + dtd.tableType = 1; + ZSTD_memcpy(DTable, &dtd, sizeof(dtd)); + return iSize; +} + + +FORCE_INLINE_TEMPLATE U32 +HUF_decodeSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog) +{ + size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ + ZSTD_memcpy(op, &dt[val].sequence, 2); + BIT_skipBits(DStream, dt[val].nbBits); + return dt[val].length; +} + +FORCE_INLINE_TEMPLATE U32 +HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog) +{ + size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ + ZSTD_memcpy(op, &dt[val].sequence, 1); + if (dt[val].length==1) { + BIT_skipBits(DStream, dt[val].nbBits); + } else { + if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) { + BIT_skipBits(DStream, dt[val].nbBits); + if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8)) + /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */ + DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8); + } + } + return 1; +} + +#define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \ + do { ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog); } while (0) + +#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \ + do { \ + if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \ + ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog); \ + } while (0) + +#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \ + do { \ + if (MEM_64bits()) \ + ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog); \ + } while (0) + +HINT_INLINE size_t +HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, + const HUF_DEltX2* const dt, const U32 dtLog) +{ + BYTE* const pStart = p; + + /* up to 8 symbols at a time */ + if ((size_t)(pEnd - p) >= sizeof(bitDPtr->bitContainer)) { + if (dtLog <= 11 && MEM_64bits()) { + /* up to 10 symbols at a time */ + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-9)) { + HUF_DECODE_SYMBOLX2_0(p, bitDPtr); + HUF_DECODE_SYMBOLX2_0(p, bitDPtr); + HUF_DECODE_SYMBOLX2_0(p, bitDPtr); + HUF_DECODE_SYMBOLX2_0(p, bitDPtr); + HUF_DECODE_SYMBOLX2_0(p, bitDPtr); + } + } else { + /* up to 8 symbols at a time */ + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) { + HUF_DECODE_SYMBOLX2_2(p, bitDPtr); + HUF_DECODE_SYMBOLX2_1(p, bitDPtr); + HUF_DECODE_SYMBOLX2_2(p, bitDPtr); + HUF_DECODE_SYMBOLX2_0(p, bitDPtr); + } + } + } else { + BIT_reloadDStream(bitDPtr); + } + + /* closer to end : up to 2 symbols at a time */ + if ((size_t)(pEnd - p) >= 2) { + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2)) + HUF_DECODE_SYMBOLX2_0(p, bitDPtr); + + while (p <= pEnd-2) + HUF_DECODE_SYMBOLX2_0(p, bitDPtr); /* no need to reload : reached the end of DStream */ + } + + if (p < pEnd) + p += HUF_decodeLastSymbolX2(p, bitDPtr, dt, dtLog); + + return p-pStart; +} + +FORCE_INLINE_TEMPLATE size_t +HUF_decompress1X2_usingDTable_internal_body( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + BIT_DStream_t bitD; + + /* Init */ + CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) ); + + /* decode */ + { BYTE* const ostart = (BYTE*) dst; + BYTE* const oend = ZSTD_maybeNullPtrAdd(ostart, dstSize); + const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */ + const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr; + DTableDesc const dtd = HUF_getDTableDesc(DTable); + HUF_decodeStreamX2(ostart, &bitD, oend, dt, dtd.tableLog); + } + + /* check */ + if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected); + + /* decoded size */ + return dstSize; +} + +/* HUF_decompress4X2_usingDTable_internal_body(): + * Conditions: + * @dstSize >= 6 + */ +FORCE_INLINE_TEMPLATE size_t +HUF_decompress4X2_usingDTable_internal_body( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ + if (dstSize < 6) return ERROR(corruption_detected); /* stream 4-split doesn't work */ + + { const BYTE* const istart = (const BYTE*) cSrc; + BYTE* const ostart = (BYTE*) dst; + BYTE* const oend = ostart + dstSize; + BYTE* const olimit = oend - (sizeof(size_t)-1); + const void* const dtPtr = DTable+1; + const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr; + + /* Init */ + BIT_DStream_t bitD1; + BIT_DStream_t bitD2; + BIT_DStream_t bitD3; + BIT_DStream_t bitD4; + size_t const length1 = MEM_readLE16(istart); + size_t const length2 = MEM_readLE16(istart+2); + size_t const length3 = MEM_readLE16(istart+4); + size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6); + const BYTE* const istart1 = istart + 6; /* jumpTable */ + const BYTE* const istart2 = istart1 + length1; + const BYTE* const istart3 = istart2 + length2; + const BYTE* const istart4 = istart3 + length3; + size_t const segmentSize = (dstSize+3) / 4; + BYTE* const opStart2 = ostart + segmentSize; + BYTE* const opStart3 = opStart2 + segmentSize; + BYTE* const opStart4 = opStart3 + segmentSize; + BYTE* op1 = ostart; + BYTE* op2 = opStart2; + BYTE* op3 = opStart3; + BYTE* op4 = opStart4; + U32 endSignal = 1; + DTableDesc const dtd = HUF_getDTableDesc(DTable); + U32 const dtLog = dtd.tableLog; + + if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ + if (opStart4 > oend) return ERROR(corruption_detected); /* overflow */ + assert(dstSize >= 6 /* validated above */); + CHECK_F( BIT_initDStream(&bitD1, istart1, length1) ); + CHECK_F( BIT_initDStream(&bitD2, istart2, length2) ); + CHECK_F( BIT_initDStream(&bitD3, istart3, length3) ); + CHECK_F( BIT_initDStream(&bitD4, istart4, length4) ); + + /* 16-32 symbols per loop (4-8 symbols per stream) */ + if ((size_t)(oend - op4) >= sizeof(size_t)) { + for ( ; (endSignal) & (op4 < olimit); ) { +#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__)) + HUF_DECODE_SYMBOLX2_2(op1, &bitD1); + HUF_DECODE_SYMBOLX2_1(op1, &bitD1); + HUF_DECODE_SYMBOLX2_2(op1, &bitD1); + HUF_DECODE_SYMBOLX2_0(op1, &bitD1); + HUF_DECODE_SYMBOLX2_2(op2, &bitD2); + HUF_DECODE_SYMBOLX2_1(op2, &bitD2); + HUF_DECODE_SYMBOLX2_2(op2, &bitD2); + HUF_DECODE_SYMBOLX2_0(op2, &bitD2); + endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished; + endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished; + HUF_DECODE_SYMBOLX2_2(op3, &bitD3); + HUF_DECODE_SYMBOLX2_1(op3, &bitD3); + HUF_DECODE_SYMBOLX2_2(op3, &bitD3); + HUF_DECODE_SYMBOLX2_0(op3, &bitD3); + HUF_DECODE_SYMBOLX2_2(op4, &bitD4); + HUF_DECODE_SYMBOLX2_1(op4, &bitD4); + HUF_DECODE_SYMBOLX2_2(op4, &bitD4); + HUF_DECODE_SYMBOLX2_0(op4, &bitD4); + endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished; + endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished; +#else + HUF_DECODE_SYMBOLX2_2(op1, &bitD1); + HUF_DECODE_SYMBOLX2_2(op2, &bitD2); + HUF_DECODE_SYMBOLX2_2(op3, &bitD3); + HUF_DECODE_SYMBOLX2_2(op4, &bitD4); + HUF_DECODE_SYMBOLX2_1(op1, &bitD1); + HUF_DECODE_SYMBOLX2_1(op2, &bitD2); + HUF_DECODE_SYMBOLX2_1(op3, &bitD3); + HUF_DECODE_SYMBOLX2_1(op4, &bitD4); + HUF_DECODE_SYMBOLX2_2(op1, &bitD1); + HUF_DECODE_SYMBOLX2_2(op2, &bitD2); + HUF_DECODE_SYMBOLX2_2(op3, &bitD3); + HUF_DECODE_SYMBOLX2_2(op4, &bitD4); + HUF_DECODE_SYMBOLX2_0(op1, &bitD1); + HUF_DECODE_SYMBOLX2_0(op2, &bitD2); + HUF_DECODE_SYMBOLX2_0(op3, &bitD3); + HUF_DECODE_SYMBOLX2_0(op4, &bitD4); + endSignal = (U32)LIKELY((U32) + (BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished) + & (BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished) + & (BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished) + & (BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished)); +#endif + } + } + + /* check corruption */ + if (op1 > opStart2) return ERROR(corruption_detected); + if (op2 > opStart3) return ERROR(corruption_detected); + if (op3 > opStart4) return ERROR(corruption_detected); + /* note : op4 already verified within main loop */ + + /* finish bitStreams one by one */ + HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog); + HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog); + HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog); + HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog); + + /* check */ + { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); + if (!endCheck) return ERROR(corruption_detected); } + + /* decoded size */ + return dstSize; + } +} + +#if HUF_NEED_BMI2_FUNCTION +static BMI2_TARGET_ATTRIBUTE +size_t HUF_decompress4X2_usingDTable_internal_bmi2(void* dst, size_t dstSize, void const* cSrc, + size_t cSrcSize, HUF_DTable const* DTable) { + return HUF_decompress4X2_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable); +} +#endif + +static +size_t HUF_decompress4X2_usingDTable_internal_default(void* dst, size_t dstSize, void const* cSrc, + size_t cSrcSize, HUF_DTable const* DTable) { + return HUF_decompress4X2_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable); +} + +#if ZSTD_ENABLE_ASM_X86_64_BMI2 + +HUF_ASM_DECL void HUF_decompress4X2_usingDTable_internal_fast_asm_loop(HUF_DecompressFastArgs* args) ZSTDLIB_HIDDEN; + +#endif + +static HUF_FAST_BMI2_ATTRS +void HUF_decompress4X2_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs* args) +{ + U64 bits[4]; + BYTE const* ip[4]; + BYTE* op[4]; + BYTE* oend[4]; + HUF_DEltX2 const* const dtable = (HUF_DEltX2 const*)args->dt; + BYTE const* const ilowest = args->ilowest; + + /* Copy the arguments to local registers. */ + ZSTD_memcpy(&bits, &args->bits, sizeof(bits)); + ZSTD_memcpy((void*)(&ip), &args->ip, sizeof(ip)); + ZSTD_memcpy(&op, &args->op, sizeof(op)); + + oend[0] = op[1]; + oend[1] = op[2]; + oend[2] = op[3]; + oend[3] = args->oend; + + assert(MEM_isLittleEndian()); + assert(!MEM_32bits()); + + for (;;) { + BYTE* olimit; + int stream; + + /* Assert loop preconditions */ +#ifndef NDEBUG + for (stream = 0; stream < 4; ++stream) { + assert(op[stream] <= oend[stream]); + assert(ip[stream] >= ilowest); + } +#endif + /* Compute olimit */ + { + /* Each loop does 5 table lookups for each of the 4 streams. + * Each table lookup consumes up to 11 bits of input, and produces + * up to 2 bytes of output. + */ + /* We can consume up to 7 bytes of input per iteration per stream. + * We also know that each input pointer is >= ip[0]. So we can run + * iters loops before running out of input. + */ + size_t iters = (size_t)(ip[0] - ilowest) / 7; + /* Each iteration can produce up to 10 bytes of output per stream. + * Each output stream my advance at different rates. So take the + * minimum number of safe iterations among all the output streams. + */ + for (stream = 0; stream < 4; ++stream) { + size_t const oiters = (size_t)(oend[stream] - op[stream]) / 10; + iters = MIN(iters, oiters); + } + + /* Each iteration produces at least 5 output symbols. So until + * op[3] crosses olimit, we know we haven't executed iters + * iterations yet. This saves us maintaining an iters counter, + * at the expense of computing the remaining # of iterations + * more frequently. + */ + olimit = op[3] + (iters * 5); + + /* Exit the fast decoding loop once we reach the end. */ + if (op[3] == olimit) + break; + + /* Exit the decoding loop if any input pointer has crossed the + * previous one. This indicates corruption, and a precondition + * to our loop is that ip[i] >= ip[0]. + */ + for (stream = 1; stream < 4; ++stream) { + if (ip[stream] < ip[stream - 1]) + goto _out; + } + } + +#ifndef NDEBUG + for (stream = 1; stream < 4; ++stream) { + assert(ip[stream] >= ip[stream - 1]); + } +#endif + +#define HUF_4X2_DECODE_SYMBOL(_stream, _decode3) \ + do { \ + if ((_decode3) || (_stream) != 3) { \ + int const index = (int)(bits[(_stream)] >> 53); \ + HUF_DEltX2 const entry = dtable[index]; \ + MEM_write16(op[(_stream)], entry.sequence); \ + bits[(_stream)] <<= (entry.nbBits) & 0x3F; \ + op[(_stream)] += (entry.length); \ + } \ + } while (0) + +#define HUF_4X2_RELOAD_STREAM(_stream) \ + do { \ + HUF_4X2_DECODE_SYMBOL(3, 1); \ + { \ + int const ctz = ZSTD_countTrailingZeros64(bits[(_stream)]); \ + int const nbBits = ctz & 7; \ + int const nbBytes = ctz >> 3; \ + ip[(_stream)] -= nbBytes; \ + bits[(_stream)] = MEM_read64(ip[(_stream)]) | 1; \ + bits[(_stream)] <<= nbBits; \ + } \ + } while (0) + + /* Manually unroll the loop because compilers don't consistently + * unroll the inner loops, which destroys performance. + */ + do { + /* Decode 5 symbols from each of the first 3 streams. + * The final stream will be decoded during the reload phase + * to reduce register pressure. + */ + HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0); + HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0); + HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0); + HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0); + HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0); + + /* Decode one symbol from the final stream */ + HUF_4X2_DECODE_SYMBOL(3, 1); + + /* Decode 4 symbols from the final stream & reload bitstreams. + * The final stream is reloaded last, meaning that all 5 symbols + * are decoded from the final stream before it is reloaded. + */ + HUF_4X_FOR_EACH_STREAM(HUF_4X2_RELOAD_STREAM); + } while (op[3] < olimit); + } + +#undef HUF_4X2_DECODE_SYMBOL +#undef HUF_4X2_RELOAD_STREAM + +_out: + + /* Save the final values of each of the state variables back to args. */ + ZSTD_memcpy(&args->bits, &bits, sizeof(bits)); + ZSTD_memcpy((void*)(&args->ip), &ip, sizeof(ip)); + ZSTD_memcpy(&args->op, &op, sizeof(op)); +} + + +static HUF_FAST_BMI2_ATTRS size_t +HUF_decompress4X2_usingDTable_internal_fast( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable, + HUF_DecompressFastLoopFn loopFn) { + void const* dt = DTable + 1; + const BYTE* const ilowest = (const BYTE*)cSrc; + BYTE* const oend = ZSTD_maybeNullPtrAdd((BYTE*)dst, dstSize); + HUF_DecompressFastArgs args; + { + size_t const ret = HUF_DecompressFastArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable); + FORWARD_IF_ERROR(ret, "Failed to init asm args"); + if (ret == 0) + return 0; + } + + assert(args.ip[0] >= args.ilowest); + loopFn(&args); + + /* note : op4 already verified within main loop */ + assert(args.ip[0] >= ilowest); + assert(args.ip[1] >= ilowest); + assert(args.ip[2] >= ilowest); + assert(args.ip[3] >= ilowest); + assert(args.op[3] <= oend); + + assert(ilowest == args.ilowest); + assert(ilowest + 6 == args.iend[0]); + (void)ilowest; + + /* finish bitStreams one by one */ + { + size_t const segmentSize = (dstSize+3) / 4; + BYTE* segmentEnd = (BYTE*)dst; + int i; + for (i = 0; i < 4; ++i) { + BIT_DStream_t bit; + if (segmentSize <= (size_t)(oend - segmentEnd)) + segmentEnd += segmentSize; + else + segmentEnd = oend; + FORWARD_IF_ERROR(HUF_initRemainingDStream(&bit, &args, i, segmentEnd), "corruption"); + args.op[i] += HUF_decodeStreamX2(args.op[i], &bit, segmentEnd, (HUF_DEltX2 const*)dt, HUF_DECODER_FAST_TABLELOG); + if (args.op[i] != segmentEnd) + return ERROR(corruption_detected); + } + } + + /* decoded size */ + return dstSize; +} + +static size_t HUF_decompress4X2_usingDTable_internal(void* dst, size_t dstSize, void const* cSrc, + size_t cSrcSize, HUF_DTable const* DTable, int flags) +{ + HUF_DecompressUsingDTableFn fallbackFn = HUF_decompress4X2_usingDTable_internal_default; + HUF_DecompressFastLoopFn loopFn = HUF_decompress4X2_usingDTable_internal_fast_c_loop; + +#if DYNAMIC_BMI2 + if (flags & HUF_flags_bmi2) { + fallbackFn = HUF_decompress4X2_usingDTable_internal_bmi2; +# if ZSTD_ENABLE_ASM_X86_64_BMI2 + if (!(flags & HUF_flags_disableAsm)) { + loopFn = HUF_decompress4X2_usingDTable_internal_fast_asm_loop; + } +# endif + } else { + return fallbackFn(dst, dstSize, cSrc, cSrcSize, DTable); + } +#endif + +#if ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__) + if (!(flags & HUF_flags_disableAsm)) { + loopFn = HUF_decompress4X2_usingDTable_internal_fast_asm_loop; + } +#endif + + if (HUF_ENABLE_FAST_DECODE && !(flags & HUF_flags_disableFast)) { + size_t const ret = HUF_decompress4X2_usingDTable_internal_fast(dst, dstSize, cSrc, cSrcSize, DTable, loopFn); + if (ret != 0) + return ret; + } + return fallbackFn(dst, dstSize, cSrc, cSrcSize, DTable); +} + +HUF_DGEN(HUF_decompress1X2_usingDTable_internal) + +size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize, int flags) +{ + const BYTE* ip = (const BYTE*) cSrc; + + size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize, + workSpace, wkspSize, flags); + if (HUF_isError(hSize)) return hSize; + if (hSize >= cSrcSize) return ERROR(srcSize_wrong); + ip += hSize; cSrcSize -= hSize; + + return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, flags); +} + +static size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize, int flags) +{ + const BYTE* ip = (const BYTE*) cSrc; + + size_t hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize, + workSpace, wkspSize, flags); + if (HUF_isError(hSize)) return hSize; + if (hSize >= cSrcSize) return ERROR(srcSize_wrong); + ip += hSize; cSrcSize -= hSize; + + return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, flags); +} + +#endif /* HUF_FORCE_DECOMPRESS_X1 */ + + +/* ***********************************/ +/* Universal decompression selectors */ +/* ***********************************/ + + +#if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2) +typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t; +static const algo_time_t algoTime[16 /* Quantization */][2 /* single, double */] = +{ + /* single, double, quad */ + {{0,0}, {1,1}}, /* Q==0 : impossible */ + {{0,0}, {1,1}}, /* Q==1 : impossible */ + {{ 150,216}, { 381,119}}, /* Q == 2 : 12-18% */ + {{ 170,205}, { 514,112}}, /* Q == 3 : 18-25% */ + {{ 177,199}, { 539,110}}, /* Q == 4 : 25-32% */ + {{ 197,194}, { 644,107}}, /* Q == 5 : 32-38% */ + {{ 221,192}, { 735,107}}, /* Q == 6 : 38-44% */ + {{ 256,189}, { 881,106}}, /* Q == 7 : 44-50% */ + {{ 359,188}, {1167,109}}, /* Q == 8 : 50-56% */ + {{ 582,187}, {1570,114}}, /* Q == 9 : 56-62% */ + {{ 688,187}, {1712,122}}, /* Q ==10 : 62-69% */ + {{ 825,186}, {1965,136}}, /* Q ==11 : 69-75% */ + {{ 976,185}, {2131,150}}, /* Q ==12 : 75-81% */ + {{1180,186}, {2070,175}}, /* Q ==13 : 81-87% */ + {{1377,185}, {1731,202}}, /* Q ==14 : 87-93% */ + {{1412,185}, {1695,202}}, /* Q ==15 : 93-99% */ +}; +#endif + +/** HUF_selectDecoder() : + * Tells which decoder is likely to decode faster, + * based on a set of pre-computed metrics. + * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 . + * Assumption : 0 < dstSize <= 128 KB */ +U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize) +{ + assert(dstSize > 0); + assert(dstSize <= 128*1024); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)dstSize; + (void)cSrcSize; + return 0; +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)dstSize; + (void)cSrcSize; + return 1; +#else + /* decoder timing evaluation */ + { U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize); /* Q < 16 */ + U32 const D256 = (U32)(dstSize >> 8); + U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256); + U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256); + DTime1 += DTime1 >> 5; /* small advantage to algorithm using less memory, to reduce cache eviction */ + return DTime1 < DTime0; + } +#endif +} + +size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize, int flags) +{ + /* validation checks */ + if (dstSize == 0) return ERROR(dstSize_tooSmall); + if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */ + if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ + if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ + + { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)algoNb; + assert(algoNb == 0); + return HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc, + cSrcSize, workSpace, wkspSize, flags); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)algoNb; + assert(algoNb == 1); + return HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc, + cSrcSize, workSpace, wkspSize, flags); +#else + return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc, + cSrcSize, workSpace, wkspSize, flags): + HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc, + cSrcSize, workSpace, wkspSize, flags); +#endif + } +} + + +size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int flags) +{ + DTableDesc const dtd = HUF_getDTableDesc(DTable); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)dtd; + assert(dtd.tableType == 0); + return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)dtd; + assert(dtd.tableType == 1); + return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags); +#else + return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags) : + HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags); +#endif +} + +#ifndef HUF_FORCE_DECOMPRESS_X2 +size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags) +{ + const BYTE* ip = (const BYTE*) cSrc; + + size_t const hSize = HUF_readDTableX1_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize, flags); + if (HUF_isError(hSize)) return hSize; + if (hSize >= cSrcSize) return ERROR(srcSize_wrong); + ip += hSize; cSrcSize -= hSize; + + return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, flags); +} +#endif + +size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int flags) +{ + DTableDesc const dtd = HUF_getDTableDesc(DTable); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)dtd; + assert(dtd.tableType == 0); + return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)dtd; + assert(dtd.tableType == 1); + return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags); +#else + return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags) : + HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, flags); +#endif +} + +size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int flags) +{ + /* validation checks */ + if (dstSize == 0) return ERROR(dstSize_tooSmall); + if (cSrcSize == 0) return ERROR(corruption_detected); + + { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)algoNb; + assert(algoNb == 0); + return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, flags); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)algoNb; + assert(algoNb == 1); + return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, flags); +#else + return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, flags) : + HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, flags); +#endif + } +} diff --git a/c-blosc/internal-complibs/zstd-1.5.6/decompress/zstd_ddict.c b/c-blosc/internal-complibs/zstd-1.5.6/decompress/zstd_ddict.c new file mode 100644 index 0000000..309ec0d --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.5.6/decompress/zstd_ddict.c @@ -0,0 +1,244 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/* zstd_ddict.c : + * concentrates all logic that needs to know the internals of ZSTD_DDict object */ + +/*-******************************************************* +* Dependencies +*********************************************************/ +#include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customFree */ +#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */ +#include "../common/cpu.h" /* bmi2 */ +#include "../common/mem.h" /* low level memory routines */ +#define FSE_STATIC_LINKING_ONLY +#include "../common/fse.h" +#include "../common/huf.h" +#include "zstd_decompress_internal.h" +#include "zstd_ddict.h" + +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) +# include "../legacy/zstd_legacy.h" +#endif + + + +/*-******************************************************* +* Types +*********************************************************/ +struct ZSTD_DDict_s { + void* dictBuffer; + const void* dictContent; + size_t dictSize; + ZSTD_entropyDTables_t entropy; + U32 dictID; + U32 entropyPresent; + ZSTD_customMem cMem; +}; /* typedef'd to ZSTD_DDict within "zstd.h" */ + +const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict) +{ + assert(ddict != NULL); + return ddict->dictContent; +} + +size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict) +{ + assert(ddict != NULL); + return ddict->dictSize; +} + +void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict) +{ + DEBUGLOG(4, "ZSTD_copyDDictParameters"); + assert(dctx != NULL); + assert(ddict != NULL); + dctx->dictID = ddict->dictID; + dctx->prefixStart = ddict->dictContent; + dctx->virtualStart = ddict->dictContent; + dctx->dictEnd = (const BYTE*)ddict->dictContent + ddict->dictSize; + dctx->previousDstEnd = dctx->dictEnd; +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + dctx->dictContentBeginForFuzzing = dctx->prefixStart; + dctx->dictContentEndForFuzzing = dctx->previousDstEnd; +#endif + if (ddict->entropyPresent) { + dctx->litEntropy = 1; + dctx->fseEntropy = 1; + dctx->LLTptr = ddict->entropy.LLTable; + dctx->MLTptr = ddict->entropy.MLTable; + dctx->OFTptr = ddict->entropy.OFTable; + dctx->HUFptr = ddict->entropy.hufTable; + dctx->entropy.rep[0] = ddict->entropy.rep[0]; + dctx->entropy.rep[1] = ddict->entropy.rep[1]; + dctx->entropy.rep[2] = ddict->entropy.rep[2]; + } else { + dctx->litEntropy = 0; + dctx->fseEntropy = 0; + } +} + + +static size_t +ZSTD_loadEntropy_intoDDict(ZSTD_DDict* ddict, + ZSTD_dictContentType_e dictContentType) +{ + ddict->dictID = 0; + ddict->entropyPresent = 0; + if (dictContentType == ZSTD_dct_rawContent) return 0; + + if (ddict->dictSize < 8) { + if (dictContentType == ZSTD_dct_fullDict) + return ERROR(dictionary_corrupted); /* only accept specified dictionaries */ + return 0; /* pure content mode */ + } + { U32 const magic = MEM_readLE32(ddict->dictContent); + if (magic != ZSTD_MAGIC_DICTIONARY) { + if (dictContentType == ZSTD_dct_fullDict) + return ERROR(dictionary_corrupted); /* only accept specified dictionaries */ + return 0; /* pure content mode */ + } + } + ddict->dictID = MEM_readLE32((const char*)ddict->dictContent + ZSTD_FRAMEIDSIZE); + + /* load entropy tables */ + RETURN_ERROR_IF(ZSTD_isError(ZSTD_loadDEntropy( + &ddict->entropy, ddict->dictContent, ddict->dictSize)), + dictionary_corrupted, ""); + ddict->entropyPresent = 1; + return 0; +} + + +static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict, + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType) +{ + if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dict) || (!dictSize)) { + ddict->dictBuffer = NULL; + ddict->dictContent = dict; + if (!dict) dictSize = 0; + } else { + void* const internalBuffer = ZSTD_customMalloc(dictSize, ddict->cMem); + ddict->dictBuffer = internalBuffer; + ddict->dictContent = internalBuffer; + if (!internalBuffer) return ERROR(memory_allocation); + ZSTD_memcpy(internalBuffer, dict, dictSize); + } + ddict->dictSize = dictSize; + ddict->entropy.hufTable[0] = (HUF_DTable)((ZSTD_HUFFDTABLE_CAPACITY_LOG)*0x1000001); /* cover both little and big endian */ + + /* parse dictionary content */ + FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) , ""); + + return 0; +} + +ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_customMem customMem) +{ + if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL; + + { ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_customMalloc(sizeof(ZSTD_DDict), customMem); + if (ddict == NULL) return NULL; + ddict->cMem = customMem; + { size_t const initResult = ZSTD_initDDict_internal(ddict, + dict, dictSize, + dictLoadMethod, dictContentType); + if (ZSTD_isError(initResult)) { + ZSTD_freeDDict(ddict); + return NULL; + } } + return ddict; + } +} + +/*! ZSTD_createDDict() : +* Create a digested dictionary, to start decompression without startup delay. +* `dict` content is copied inside DDict. +* Consequently, `dict` can be released after `ZSTD_DDict` creation */ +ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize) +{ + ZSTD_customMem const allocator = { NULL, NULL, NULL }; + return ZSTD_createDDict_advanced(dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto, allocator); +} + +/*! ZSTD_createDDict_byReference() : + * Create a digested dictionary, to start decompression without startup delay. + * Dictionary content is simply referenced, it will be accessed during decompression. + * Warning : dictBuffer must outlive DDict (DDict must be freed before dictBuffer) */ +ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize) +{ + ZSTD_customMem const allocator = { NULL, NULL, NULL }; + return ZSTD_createDDict_advanced(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, allocator); +} + + +const ZSTD_DDict* ZSTD_initStaticDDict( + void* sBuffer, size_t sBufferSize, + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType) +{ + size_t const neededSpace = sizeof(ZSTD_DDict) + + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize); + ZSTD_DDict* const ddict = (ZSTD_DDict*)sBuffer; + assert(sBuffer != NULL); + assert(dict != NULL); + if ((size_t)sBuffer & 7) return NULL; /* 8-aligned */ + if (sBufferSize < neededSpace) return NULL; + if (dictLoadMethod == ZSTD_dlm_byCopy) { + ZSTD_memcpy(ddict+1, dict, dictSize); /* local copy */ + dict = ddict+1; + } + if (ZSTD_isError( ZSTD_initDDict_internal(ddict, + dict, dictSize, + ZSTD_dlm_byRef, dictContentType) )) + return NULL; + return ddict; +} + + +size_t ZSTD_freeDDict(ZSTD_DDict* ddict) +{ + if (ddict==NULL) return 0; /* support free on NULL */ + { ZSTD_customMem const cMem = ddict->cMem; + ZSTD_customFree(ddict->dictBuffer, cMem); + ZSTD_customFree(ddict, cMem); + return 0; + } +} + +/*! ZSTD_estimateDDictSize() : + * Estimate amount of memory that will be needed to create a dictionary for decompression. + * Note : dictionary created by reference using ZSTD_dlm_byRef are smaller */ +size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod) +{ + return sizeof(ZSTD_DDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize); +} + +size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict) +{ + if (ddict==NULL) return 0; /* support sizeof on NULL */ + return sizeof(*ddict) + (ddict->dictBuffer ? ddict->dictSize : 0) ; +} + +/*! ZSTD_getDictID_fromDDict() : + * Provides the dictID of the dictionary loaded into `ddict`. + * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. + * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ +unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict) +{ + if (ddict==NULL) return 0; + return ddict->dictID; +} diff --git a/c-blosc/internal-complibs/zstd-1.5.6/decompress/zstd_ddict.h b/c-blosc/internal-complibs/zstd-1.5.6/decompress/zstd_ddict.h new file mode 100644 index 0000000..c4ca887 --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.5.6/decompress/zstd_ddict.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +#ifndef ZSTD_DDICT_H +#define ZSTD_DDICT_H + +/*-******************************************************* + * Dependencies + *********************************************************/ +#include "../common/zstd_deps.h" /* size_t */ +#include "../zstd.h" /* ZSTD_DDict, and several public functions */ + + +/*-******************************************************* + * Interface + *********************************************************/ + +/* note: several prototypes are already published in `zstd.h` : + * ZSTD_createDDict() + * ZSTD_createDDict_byReference() + * ZSTD_createDDict_advanced() + * ZSTD_freeDDict() + * ZSTD_initStaticDDict() + * ZSTD_sizeof_DDict() + * ZSTD_estimateDDictSize() + * ZSTD_getDictID_fromDict() + */ + +const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict); +size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict); + +void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict); + + + +#endif /* ZSTD_DDICT_H */ diff --git a/c-blosc/internal-complibs/zstd-1.5.6/decompress/zstd_decompress.c b/c-blosc/internal-complibs/zstd-1.5.6/decompress/zstd_decompress.c new file mode 100644 index 0000000..2f03cf7 --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.5.6/decompress/zstd_decompress.c @@ -0,0 +1,2407 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +/* *************************************************************** +* Tuning parameters +*****************************************************************/ +/*! + * HEAPMODE : + * Select how default decompression function ZSTD_decompress() allocates its context, + * on stack (0), or into heap (1, default; requires malloc()). + * Note that functions with explicit context such as ZSTD_decompressDCtx() are unaffected. + */ +#ifndef ZSTD_HEAPMODE +# define ZSTD_HEAPMODE 1 +#endif + +/*! +* LEGACY_SUPPORT : +* if set to 1+, ZSTD_decompress() can decode older formats (v0.1+) +*/ +#ifndef ZSTD_LEGACY_SUPPORT +# define ZSTD_LEGACY_SUPPORT 0 +#endif + +/*! + * MAXWINDOWSIZE_DEFAULT : + * maximum window size accepted by DStream __by default__. + * Frames requiring more memory will be rejected. + * It's possible to set a different limit using ZSTD_DCtx_setMaxWindowSize(). + */ +#ifndef ZSTD_MAXWINDOWSIZE_DEFAULT +# define ZSTD_MAXWINDOWSIZE_DEFAULT (((U32)1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT) + 1) +#endif + +/*! + * NO_FORWARD_PROGRESS_MAX : + * maximum allowed nb of calls to ZSTD_decompressStream() + * without any forward progress + * (defined as: no byte read from input, and no byte flushed to output) + * before triggering an error. + */ +#ifndef ZSTD_NO_FORWARD_PROGRESS_MAX +# define ZSTD_NO_FORWARD_PROGRESS_MAX 16 +#endif + + +/*-******************************************************* +* Dependencies +*********************************************************/ +#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */ +#include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customCalloc, ZSTD_customFree */ +#include "../common/error_private.h" +#include "../common/zstd_internal.h" /* blockProperties_t */ +#include "../common/mem.h" /* low level memory routines */ +#include "../common/bits.h" /* ZSTD_highbit32 */ +#define FSE_STATIC_LINKING_ONLY +#include "../common/fse.h" +#include "../common/huf.h" +#include "../common/xxhash.h" /* XXH64_reset, XXH64_update, XXH64_digest, XXH64 */ +#include "zstd_decompress_internal.h" /* ZSTD_DCtx */ +#include "zstd_ddict.h" /* ZSTD_DDictDictContent */ +#include "zstd_decompress_block.h" /* ZSTD_decompressBlock_internal */ + +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) +# include "../legacy/zstd_legacy.h" +#endif + + + +/************************************* + * Multiple DDicts Hashset internals * + *************************************/ + +#define DDICT_HASHSET_MAX_LOAD_FACTOR_COUNT_MULT 4 +#define DDICT_HASHSET_MAX_LOAD_FACTOR_SIZE_MULT 3 /* These two constants represent SIZE_MULT/COUNT_MULT load factor without using a float. + * Currently, that means a 0.75 load factor. + * So, if count * COUNT_MULT / size * SIZE_MULT != 0, then we've exceeded + * the load factor of the ddict hash set. + */ + +#define DDICT_HASHSET_TABLE_BASE_SIZE 64 +#define DDICT_HASHSET_RESIZE_FACTOR 2 + +/* Hash function to determine starting position of dict insertion within the table + * Returns an index between [0, hashSet->ddictPtrTableSize] + */ +static size_t ZSTD_DDictHashSet_getIndex(const ZSTD_DDictHashSet* hashSet, U32 dictID) { + const U64 hash = XXH64(&dictID, sizeof(U32), 0); + /* DDict ptr table size is a multiple of 2, use size - 1 as mask to get index within [0, hashSet->ddictPtrTableSize) */ + return hash & (hashSet->ddictPtrTableSize - 1); +} + +/* Adds DDict to a hashset without resizing it. + * If inserting a DDict with a dictID that already exists in the set, replaces the one in the set. + * Returns 0 if successful, or a zstd error code if something went wrong. + */ +static size_t ZSTD_DDictHashSet_emplaceDDict(ZSTD_DDictHashSet* hashSet, const ZSTD_DDict* ddict) { + const U32 dictID = ZSTD_getDictID_fromDDict(ddict); + size_t idx = ZSTD_DDictHashSet_getIndex(hashSet, dictID); + const size_t idxRangeMask = hashSet->ddictPtrTableSize - 1; + RETURN_ERROR_IF(hashSet->ddictPtrCount == hashSet->ddictPtrTableSize, GENERIC, "Hash set is full!"); + DEBUGLOG(4, "Hashed index: for dictID: %u is %zu", dictID, idx); + while (hashSet->ddictPtrTable[idx] != NULL) { + /* Replace existing ddict if inserting ddict with same dictID */ + if (ZSTD_getDictID_fromDDict(hashSet->ddictPtrTable[idx]) == dictID) { + DEBUGLOG(4, "DictID already exists, replacing rather than adding"); + hashSet->ddictPtrTable[idx] = ddict; + return 0; + } + idx &= idxRangeMask; + idx++; + } + DEBUGLOG(4, "Final idx after probing for dictID %u is: %zu", dictID, idx); + hashSet->ddictPtrTable[idx] = ddict; + hashSet->ddictPtrCount++; + return 0; +} + +/* Expands hash table by factor of DDICT_HASHSET_RESIZE_FACTOR and + * rehashes all values, allocates new table, frees old table. + * Returns 0 on success, otherwise a zstd error code. + */ +static size_t ZSTD_DDictHashSet_expand(ZSTD_DDictHashSet* hashSet, ZSTD_customMem customMem) { + size_t newTableSize = hashSet->ddictPtrTableSize * DDICT_HASHSET_RESIZE_FACTOR; + const ZSTD_DDict** newTable = (const ZSTD_DDict**)ZSTD_customCalloc(sizeof(ZSTD_DDict*) * newTableSize, customMem); + const ZSTD_DDict** oldTable = hashSet->ddictPtrTable; + size_t oldTableSize = hashSet->ddictPtrTableSize; + size_t i; + + DEBUGLOG(4, "Expanding DDict hash table! Old size: %zu new size: %zu", oldTableSize, newTableSize); + RETURN_ERROR_IF(!newTable, memory_allocation, "Expanded hashset allocation failed!"); + hashSet->ddictPtrTable = newTable; + hashSet->ddictPtrTableSize = newTableSize; + hashSet->ddictPtrCount = 0; + for (i = 0; i < oldTableSize; ++i) { + if (oldTable[i] != NULL) { + FORWARD_IF_ERROR(ZSTD_DDictHashSet_emplaceDDict(hashSet, oldTable[i]), ""); + } + } + ZSTD_customFree((void*)oldTable, customMem); + DEBUGLOG(4, "Finished re-hash"); + return 0; +} + +/* Fetches a DDict with the given dictID + * Returns the ZSTD_DDict* with the requested dictID. If it doesn't exist, then returns NULL. + */ +static const ZSTD_DDict* ZSTD_DDictHashSet_getDDict(ZSTD_DDictHashSet* hashSet, U32 dictID) { + size_t idx = ZSTD_DDictHashSet_getIndex(hashSet, dictID); + const size_t idxRangeMask = hashSet->ddictPtrTableSize - 1; + DEBUGLOG(4, "Hashed index: for dictID: %u is %zu", dictID, idx); + for (;;) { + size_t currDictID = ZSTD_getDictID_fromDDict(hashSet->ddictPtrTable[idx]); + if (currDictID == dictID || currDictID == 0) { + /* currDictID == 0 implies a NULL ddict entry */ + break; + } else { + idx &= idxRangeMask; /* Goes to start of table when we reach the end */ + idx++; + } + } + DEBUGLOG(4, "Final idx after probing for dictID %u is: %zu", dictID, idx); + return hashSet->ddictPtrTable[idx]; +} + +/* Allocates space for and returns a ddict hash set + * The hash set's ZSTD_DDict* table has all values automatically set to NULL to begin with. + * Returns NULL if allocation failed. + */ +static ZSTD_DDictHashSet* ZSTD_createDDictHashSet(ZSTD_customMem customMem) { + ZSTD_DDictHashSet* ret = (ZSTD_DDictHashSet*)ZSTD_customMalloc(sizeof(ZSTD_DDictHashSet), customMem); + DEBUGLOG(4, "Allocating new hash set"); + if (!ret) + return NULL; + ret->ddictPtrTable = (const ZSTD_DDict**)ZSTD_customCalloc(DDICT_HASHSET_TABLE_BASE_SIZE * sizeof(ZSTD_DDict*), customMem); + if (!ret->ddictPtrTable) { + ZSTD_customFree(ret, customMem); + return NULL; + } + ret->ddictPtrTableSize = DDICT_HASHSET_TABLE_BASE_SIZE; + ret->ddictPtrCount = 0; + return ret; +} + +/* Frees the table of ZSTD_DDict* within a hashset, then frees the hashset itself. + * Note: The ZSTD_DDict* within the table are NOT freed. + */ +static void ZSTD_freeDDictHashSet(ZSTD_DDictHashSet* hashSet, ZSTD_customMem customMem) { + DEBUGLOG(4, "Freeing ddict hash set"); + if (hashSet && hashSet->ddictPtrTable) { + ZSTD_customFree((void*)hashSet->ddictPtrTable, customMem); + } + if (hashSet) { + ZSTD_customFree(hashSet, customMem); + } +} + +/* Public function: Adds a DDict into the ZSTD_DDictHashSet, possibly triggering a resize of the hash set. + * Returns 0 on success, or a ZSTD error. + */ +static size_t ZSTD_DDictHashSet_addDDict(ZSTD_DDictHashSet* hashSet, const ZSTD_DDict* ddict, ZSTD_customMem customMem) { + DEBUGLOG(4, "Adding dict ID: %u to hashset with - Count: %zu Tablesize: %zu", ZSTD_getDictID_fromDDict(ddict), hashSet->ddictPtrCount, hashSet->ddictPtrTableSize); + if (hashSet->ddictPtrCount * DDICT_HASHSET_MAX_LOAD_FACTOR_COUNT_MULT / hashSet->ddictPtrTableSize * DDICT_HASHSET_MAX_LOAD_FACTOR_SIZE_MULT != 0) { + FORWARD_IF_ERROR(ZSTD_DDictHashSet_expand(hashSet, customMem), ""); + } + FORWARD_IF_ERROR(ZSTD_DDictHashSet_emplaceDDict(hashSet, ddict), ""); + return 0; +} + +/*-************************************************************* +* Context management +***************************************************************/ +size_t ZSTD_sizeof_DCtx (const ZSTD_DCtx* dctx) +{ + if (dctx==NULL) return 0; /* support sizeof NULL */ + return sizeof(*dctx) + + ZSTD_sizeof_DDict(dctx->ddictLocal) + + dctx->inBuffSize + dctx->outBuffSize; +} + +size_t ZSTD_estimateDCtxSize(void) { return sizeof(ZSTD_DCtx); } + + +static size_t ZSTD_startingInputLength(ZSTD_format_e format) +{ + size_t const startingInputLength = ZSTD_FRAMEHEADERSIZE_PREFIX(format); + /* only supports formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless */ + assert( (format == ZSTD_f_zstd1) || (format == ZSTD_f_zstd1_magicless) ); + return startingInputLength; +} + +static void ZSTD_DCtx_resetParameters(ZSTD_DCtx* dctx) +{ + assert(dctx->streamStage == zdss_init); + dctx->format = ZSTD_f_zstd1; + dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT; + dctx->outBufferMode = ZSTD_bm_buffered; + dctx->forceIgnoreChecksum = ZSTD_d_validateChecksum; + dctx->refMultipleDDicts = ZSTD_rmd_refSingleDDict; + dctx->disableHufAsm = 0; + dctx->maxBlockSizeParam = 0; +} + +static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx) +{ + dctx->staticSize = 0; + dctx->ddict = NULL; + dctx->ddictLocal = NULL; + dctx->dictEnd = NULL; + dctx->ddictIsCold = 0; + dctx->dictUses = ZSTD_dont_use; + dctx->inBuff = NULL; + dctx->inBuffSize = 0; + dctx->outBuffSize = 0; + dctx->streamStage = zdss_init; +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) + dctx->legacyContext = NULL; + dctx->previousLegacyVersion = 0; +#endif + dctx->noForwardProgress = 0; + dctx->oversizedDuration = 0; + dctx->isFrameDecompression = 1; +#if DYNAMIC_BMI2 + dctx->bmi2 = ZSTD_cpuSupportsBmi2(); +#endif + dctx->ddictSet = NULL; + ZSTD_DCtx_resetParameters(dctx); +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + dctx->dictContentEndForFuzzing = NULL; +#endif +} + +ZSTD_DCtx* ZSTD_initStaticDCtx(void *workspace, size_t workspaceSize) +{ + ZSTD_DCtx* const dctx = (ZSTD_DCtx*) workspace; + + if ((size_t)workspace & 7) return NULL; /* 8-aligned */ + if (workspaceSize < sizeof(ZSTD_DCtx)) return NULL; /* minimum size */ + + ZSTD_initDCtx_internal(dctx); + dctx->staticSize = workspaceSize; + dctx->inBuff = (char*)(dctx+1); + return dctx; +} + +static ZSTD_DCtx* ZSTD_createDCtx_internal(ZSTD_customMem customMem) { + if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL; + + { ZSTD_DCtx* const dctx = (ZSTD_DCtx*)ZSTD_customMalloc(sizeof(*dctx), customMem); + if (!dctx) return NULL; + dctx->customMem = customMem; + ZSTD_initDCtx_internal(dctx); + return dctx; + } +} + +ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem) +{ + return ZSTD_createDCtx_internal(customMem); +} + +ZSTD_DCtx* ZSTD_createDCtx(void) +{ + DEBUGLOG(3, "ZSTD_createDCtx"); + return ZSTD_createDCtx_internal(ZSTD_defaultCMem); +} + +static void ZSTD_clearDict(ZSTD_DCtx* dctx) +{ + ZSTD_freeDDict(dctx->ddictLocal); + dctx->ddictLocal = NULL; + dctx->ddict = NULL; + dctx->dictUses = ZSTD_dont_use; +} + +size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx) +{ + if (dctx==NULL) return 0; /* support free on NULL */ + RETURN_ERROR_IF(dctx->staticSize, memory_allocation, "not compatible with static DCtx"); + { ZSTD_customMem const cMem = dctx->customMem; + ZSTD_clearDict(dctx); + ZSTD_customFree(dctx->inBuff, cMem); + dctx->inBuff = NULL; +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) + if (dctx->legacyContext) + ZSTD_freeLegacyStreamContext(dctx->legacyContext, dctx->previousLegacyVersion); +#endif + if (dctx->ddictSet) { + ZSTD_freeDDictHashSet(dctx->ddictSet, cMem); + dctx->ddictSet = NULL; + } + ZSTD_customFree(dctx, cMem); + return 0; + } +} + +/* no longer useful */ +void ZSTD_copyDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx) +{ + size_t const toCopy = (size_t)((char*)(&dstDCtx->inBuff) - (char*)dstDCtx); + ZSTD_memcpy(dstDCtx, srcDCtx, toCopy); /* no need to copy workspace */ +} + +/* Given a dctx with a digested frame params, re-selects the correct ZSTD_DDict based on + * the requested dict ID from the frame. If there exists a reference to the correct ZSTD_DDict, then + * accordingly sets the ddict to be used to decompress the frame. + * + * If no DDict is found, then no action is taken, and the ZSTD_DCtx::ddict remains as-is. + * + * ZSTD_d_refMultipleDDicts must be enabled for this function to be called. + */ +static void ZSTD_DCtx_selectFrameDDict(ZSTD_DCtx* dctx) { + assert(dctx->refMultipleDDicts && dctx->ddictSet); + DEBUGLOG(4, "Adjusting DDict based on requested dict ID from frame"); + if (dctx->ddict) { + const ZSTD_DDict* frameDDict = ZSTD_DDictHashSet_getDDict(dctx->ddictSet, dctx->fParams.dictID); + if (frameDDict) { + DEBUGLOG(4, "DDict found!"); + ZSTD_clearDict(dctx); + dctx->dictID = dctx->fParams.dictID; + dctx->ddict = frameDDict; + dctx->dictUses = ZSTD_use_indefinitely; + } + } +} + + +/*-************************************************************* + * Frame header decoding + ***************************************************************/ + +/*! ZSTD_isFrame() : + * Tells if the content of `buffer` starts with a valid Frame Identifier. + * Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0. + * Note 2 : Legacy Frame Identifiers are considered valid only if Legacy Support is enabled. + * Note 3 : Skippable Frame Identifiers are considered valid. */ +unsigned ZSTD_isFrame(const void* buffer, size_t size) +{ + if (size < ZSTD_FRAMEIDSIZE) return 0; + { U32 const magic = MEM_readLE32(buffer); + if (magic == ZSTD_MAGICNUMBER) return 1; + if ((magic & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) return 1; + } +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) + if (ZSTD_isLegacy(buffer, size)) return 1; +#endif + return 0; +} + +/*! ZSTD_isSkippableFrame() : + * Tells if the content of `buffer` starts with a valid Frame Identifier for a skippable frame. + * Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0. + */ +unsigned ZSTD_isSkippableFrame(const void* buffer, size_t size) +{ + if (size < ZSTD_FRAMEIDSIZE) return 0; + { U32 const magic = MEM_readLE32(buffer); + if ((magic & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) return 1; + } + return 0; +} + +/** ZSTD_frameHeaderSize_internal() : + * srcSize must be large enough to reach header size fields. + * note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless. + * @return : size of the Frame Header + * or an error code, which can be tested with ZSTD_isError() */ +static size_t ZSTD_frameHeaderSize_internal(const void* src, size_t srcSize, ZSTD_format_e format) +{ + size_t const minInputSize = ZSTD_startingInputLength(format); + RETURN_ERROR_IF(srcSize < minInputSize, srcSize_wrong, ""); + + { BYTE const fhd = ((const BYTE*)src)[minInputSize-1]; + U32 const dictID= fhd & 3; + U32 const singleSegment = (fhd >> 5) & 1; + U32 const fcsId = fhd >> 6; + return minInputSize + !singleSegment + + ZSTD_did_fieldSize[dictID] + ZSTD_fcs_fieldSize[fcsId] + + (singleSegment && !fcsId); + } +} + +/** ZSTD_frameHeaderSize() : + * srcSize must be >= ZSTD_frameHeaderSize_prefix. + * @return : size of the Frame Header, + * or an error code (if srcSize is too small) */ +size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize) +{ + return ZSTD_frameHeaderSize_internal(src, srcSize, ZSTD_f_zstd1); +} + + +/** ZSTD_getFrameHeader_advanced() : + * decode Frame Header, or require larger `srcSize`. + * note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless + * @return : 0, `zfhPtr` is correctly filled, + * >0, `srcSize` is too small, value is wanted `srcSize` amount, +** or an error code, which can be tested using ZSTD_isError() */ +size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format) +{ + const BYTE* ip = (const BYTE*)src; + size_t const minInputSize = ZSTD_startingInputLength(format); + + DEBUGLOG(5, "ZSTD_getFrameHeader_advanced: minInputSize = %zu, srcSize = %zu", minInputSize, srcSize); + + if (srcSize > 0) { + /* note : technically could be considered an assert(), since it's an invalid entry */ + RETURN_ERROR_IF(src==NULL, GENERIC, "invalid parameter : src==NULL, but srcSize>0"); + } + if (srcSize < minInputSize) { + if (srcSize > 0 && format != ZSTD_f_zstd1_magicless) { + /* when receiving less than @minInputSize bytes, + * control these bytes at least correspond to a supported magic number + * in order to error out early if they don't. + **/ + size_t const toCopy = MIN(4, srcSize); + unsigned char hbuf[4]; MEM_writeLE32(hbuf, ZSTD_MAGICNUMBER); + assert(src != NULL); + ZSTD_memcpy(hbuf, src, toCopy); + if ( MEM_readLE32(hbuf) != ZSTD_MAGICNUMBER ) { + /* not a zstd frame : let's check if it's a skippable frame */ + MEM_writeLE32(hbuf, ZSTD_MAGIC_SKIPPABLE_START); + ZSTD_memcpy(hbuf, src, toCopy); + if ((MEM_readLE32(hbuf) & ZSTD_MAGIC_SKIPPABLE_MASK) != ZSTD_MAGIC_SKIPPABLE_START) { + RETURN_ERROR(prefix_unknown, + "first bytes don't correspond to any supported magic number"); + } } } + return minInputSize; + } + + ZSTD_memset(zfhPtr, 0, sizeof(*zfhPtr)); /* not strictly necessary, but static analyzers may not understand that zfhPtr will be read only if return value is zero, since they are 2 different signals */ + if ( (format != ZSTD_f_zstd1_magicless) + && (MEM_readLE32(src) != ZSTD_MAGICNUMBER) ) { + if ((MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { + /* skippable frame */ + if (srcSize < ZSTD_SKIPPABLEHEADERSIZE) + return ZSTD_SKIPPABLEHEADERSIZE; /* magic number + frame length */ + ZSTD_memset(zfhPtr, 0, sizeof(*zfhPtr)); + zfhPtr->frameContentSize = MEM_readLE32((const char *)src + ZSTD_FRAMEIDSIZE); + zfhPtr->frameType = ZSTD_skippableFrame; + return 0; + } + RETURN_ERROR(prefix_unknown, ""); + } + + /* ensure there is enough `srcSize` to fully read/decode frame header */ + { size_t const fhsize = ZSTD_frameHeaderSize_internal(src, srcSize, format); + if (srcSize < fhsize) return fhsize; + zfhPtr->headerSize = (U32)fhsize; + } + + { BYTE const fhdByte = ip[minInputSize-1]; + size_t pos = minInputSize; + U32 const dictIDSizeCode = fhdByte&3; + U32 const checksumFlag = (fhdByte>>2)&1; + U32 const singleSegment = (fhdByte>>5)&1; + U32 const fcsID = fhdByte>>6; + U64 windowSize = 0; + U32 dictID = 0; + U64 frameContentSize = ZSTD_CONTENTSIZE_UNKNOWN; + RETURN_ERROR_IF((fhdByte & 0x08) != 0, frameParameter_unsupported, + "reserved bits, must be zero"); + + if (!singleSegment) { + BYTE const wlByte = ip[pos++]; + U32 const windowLog = (wlByte >> 3) + ZSTD_WINDOWLOG_ABSOLUTEMIN; + RETURN_ERROR_IF(windowLog > ZSTD_WINDOWLOG_MAX, frameParameter_windowTooLarge, ""); + windowSize = (1ULL << windowLog); + windowSize += (windowSize >> 3) * (wlByte&7); + } + switch(dictIDSizeCode) + { + default: + assert(0); /* impossible */ + ZSTD_FALLTHROUGH; + case 0 : break; + case 1 : dictID = ip[pos]; pos++; break; + case 2 : dictID = MEM_readLE16(ip+pos); pos+=2; break; + case 3 : dictID = MEM_readLE32(ip+pos); pos+=4; break; + } + switch(fcsID) + { + default: + assert(0); /* impossible */ + ZSTD_FALLTHROUGH; + case 0 : if (singleSegment) frameContentSize = ip[pos]; break; + case 1 : frameContentSize = MEM_readLE16(ip+pos)+256; break; + case 2 : frameContentSize = MEM_readLE32(ip+pos); break; + case 3 : frameContentSize = MEM_readLE64(ip+pos); break; + } + if (singleSegment) windowSize = frameContentSize; + + zfhPtr->frameType = ZSTD_frame; + zfhPtr->frameContentSize = frameContentSize; + zfhPtr->windowSize = windowSize; + zfhPtr->blockSizeMax = (unsigned) MIN(windowSize, ZSTD_BLOCKSIZE_MAX); + zfhPtr->dictID = dictID; + zfhPtr->checksumFlag = checksumFlag; + } + return 0; +} + +/** ZSTD_getFrameHeader() : + * decode Frame Header, or require larger `srcSize`. + * note : this function does not consume input, it only reads it. + * @return : 0, `zfhPtr` is correctly filled, + * >0, `srcSize` is too small, value is wanted `srcSize` amount, + * or an error code, which can be tested using ZSTD_isError() */ +size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize) +{ + return ZSTD_getFrameHeader_advanced(zfhPtr, src, srcSize, ZSTD_f_zstd1); +} + +/** ZSTD_getFrameContentSize() : + * compatible with legacy mode + * @return : decompressed size of the single frame pointed to be `src` if known, otherwise + * - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined + * - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small) */ +unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize) +{ +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) + if (ZSTD_isLegacy(src, srcSize)) { + unsigned long long const ret = ZSTD_getDecompressedSize_legacy(src, srcSize); + return ret == 0 ? ZSTD_CONTENTSIZE_UNKNOWN : ret; + } +#endif + { ZSTD_frameHeader zfh; + if (ZSTD_getFrameHeader(&zfh, src, srcSize) != 0) + return ZSTD_CONTENTSIZE_ERROR; + if (zfh.frameType == ZSTD_skippableFrame) { + return 0; + } else { + return zfh.frameContentSize; + } } +} + +static size_t readSkippableFrameSize(void const* src, size_t srcSize) +{ + size_t const skippableHeaderSize = ZSTD_SKIPPABLEHEADERSIZE; + U32 sizeU32; + + RETURN_ERROR_IF(srcSize < ZSTD_SKIPPABLEHEADERSIZE, srcSize_wrong, ""); + + sizeU32 = MEM_readLE32((BYTE const*)src + ZSTD_FRAMEIDSIZE); + RETURN_ERROR_IF((U32)(sizeU32 + ZSTD_SKIPPABLEHEADERSIZE) < sizeU32, + frameParameter_unsupported, ""); + { size_t const skippableSize = skippableHeaderSize + sizeU32; + RETURN_ERROR_IF(skippableSize > srcSize, srcSize_wrong, ""); + return skippableSize; + } +} + +/*! ZSTD_readSkippableFrame() : + * Retrieves content of a skippable frame, and writes it to dst buffer. + * + * The parameter magicVariant will receive the magicVariant that was supplied when the frame was written, + * i.e. magicNumber - ZSTD_MAGIC_SKIPPABLE_START. This can be NULL if the caller is not interested + * in the magicVariant. + * + * Returns an error if destination buffer is not large enough, or if this is not a valid skippable frame. + * + * @return : number of bytes written or a ZSTD error. + */ +size_t ZSTD_readSkippableFrame(void* dst, size_t dstCapacity, + unsigned* magicVariant, /* optional, can be NULL */ + const void* src, size_t srcSize) +{ + RETURN_ERROR_IF(srcSize < ZSTD_SKIPPABLEHEADERSIZE, srcSize_wrong, ""); + + { U32 const magicNumber = MEM_readLE32(src); + size_t skippableFrameSize = readSkippableFrameSize(src, srcSize); + size_t skippableContentSize = skippableFrameSize - ZSTD_SKIPPABLEHEADERSIZE; + + /* check input validity */ + RETURN_ERROR_IF(!ZSTD_isSkippableFrame(src, srcSize), frameParameter_unsupported, ""); + RETURN_ERROR_IF(skippableFrameSize < ZSTD_SKIPPABLEHEADERSIZE || skippableFrameSize > srcSize, srcSize_wrong, ""); + RETURN_ERROR_IF(skippableContentSize > dstCapacity, dstSize_tooSmall, ""); + + /* deliver payload */ + if (skippableContentSize > 0 && dst != NULL) + ZSTD_memcpy(dst, (const BYTE *)src + ZSTD_SKIPPABLEHEADERSIZE, skippableContentSize); + if (magicVariant != NULL) + *magicVariant = magicNumber - ZSTD_MAGIC_SKIPPABLE_START; + return skippableContentSize; + } +} + +/** ZSTD_findDecompressedSize() : + * `srcSize` must be the exact length of some number of ZSTD compressed and/or + * skippable frames + * note: compatible with legacy mode + * @return : decompressed size of the frames contained */ +unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize) +{ + unsigned long long totalDstSize = 0; + + while (srcSize >= ZSTD_startingInputLength(ZSTD_f_zstd1)) { + U32 const magicNumber = MEM_readLE32(src); + + if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { + size_t const skippableSize = readSkippableFrameSize(src, srcSize); + if (ZSTD_isError(skippableSize)) return ZSTD_CONTENTSIZE_ERROR; + assert(skippableSize <= srcSize); + + src = (const BYTE *)src + skippableSize; + srcSize -= skippableSize; + continue; + } + + { unsigned long long const fcs = ZSTD_getFrameContentSize(src, srcSize); + if (fcs >= ZSTD_CONTENTSIZE_ERROR) return fcs; + + if (totalDstSize + fcs < totalDstSize) + return ZSTD_CONTENTSIZE_ERROR; /* check for overflow */ + totalDstSize += fcs; + } + /* skip to next frame */ + { size_t const frameSrcSize = ZSTD_findFrameCompressedSize(src, srcSize); + if (ZSTD_isError(frameSrcSize)) return ZSTD_CONTENTSIZE_ERROR; + assert(frameSrcSize <= srcSize); + + src = (const BYTE *)src + frameSrcSize; + srcSize -= frameSrcSize; + } + } /* while (srcSize >= ZSTD_frameHeaderSize_prefix) */ + + if (srcSize) return ZSTD_CONTENTSIZE_ERROR; + + return totalDstSize; +} + +/** ZSTD_getDecompressedSize() : + * compatible with legacy mode + * @return : decompressed size if known, 0 otherwise + note : 0 can mean any of the following : + - frame content is empty + - decompressed size field is not present in frame header + - frame header unknown / not supported + - frame header not complete (`srcSize` too small) */ +unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize) +{ + unsigned long long const ret = ZSTD_getFrameContentSize(src, srcSize); + ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_ERROR < ZSTD_CONTENTSIZE_UNKNOWN); + return (ret >= ZSTD_CONTENTSIZE_ERROR) ? 0 : ret; +} + + +/** ZSTD_decodeFrameHeader() : + * `headerSize` must be the size provided by ZSTD_frameHeaderSize(). + * If multiple DDict references are enabled, also will choose the correct DDict to use. + * @return : 0 if success, or an error code, which can be tested using ZSTD_isError() */ +static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t headerSize) +{ + size_t const result = ZSTD_getFrameHeader_advanced(&(dctx->fParams), src, headerSize, dctx->format); + if (ZSTD_isError(result)) return result; /* invalid header */ + RETURN_ERROR_IF(result>0, srcSize_wrong, "headerSize too small"); + + /* Reference DDict requested by frame if dctx references multiple ddicts */ + if (dctx->refMultipleDDicts == ZSTD_rmd_refMultipleDDicts && dctx->ddictSet) { + ZSTD_DCtx_selectFrameDDict(dctx); + } + +#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + /* Skip the dictID check in fuzzing mode, because it makes the search + * harder. + */ + RETURN_ERROR_IF(dctx->fParams.dictID && (dctx->dictID != dctx->fParams.dictID), + dictionary_wrong, ""); +#endif + dctx->validateChecksum = (dctx->fParams.checksumFlag && !dctx->forceIgnoreChecksum) ? 1 : 0; + if (dctx->validateChecksum) XXH64_reset(&dctx->xxhState, 0); + dctx->processedCSize += headerSize; + return 0; +} + +static ZSTD_frameSizeInfo ZSTD_errorFrameSizeInfo(size_t ret) +{ + ZSTD_frameSizeInfo frameSizeInfo; + frameSizeInfo.compressedSize = ret; + frameSizeInfo.decompressedBound = ZSTD_CONTENTSIZE_ERROR; + return frameSizeInfo; +} + +static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize, ZSTD_format_e format) +{ + ZSTD_frameSizeInfo frameSizeInfo; + ZSTD_memset(&frameSizeInfo, 0, sizeof(ZSTD_frameSizeInfo)); + +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) + if (format == ZSTD_f_zstd1 && ZSTD_isLegacy(src, srcSize)) + return ZSTD_findFrameSizeInfoLegacy(src, srcSize); +#endif + + if (format == ZSTD_f_zstd1 && (srcSize >= ZSTD_SKIPPABLEHEADERSIZE) + && (MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { + frameSizeInfo.compressedSize = readSkippableFrameSize(src, srcSize); + assert(ZSTD_isError(frameSizeInfo.compressedSize) || + frameSizeInfo.compressedSize <= srcSize); + return frameSizeInfo; + } else { + const BYTE* ip = (const BYTE*)src; + const BYTE* const ipstart = ip; + size_t remainingSize = srcSize; + size_t nbBlocks = 0; + ZSTD_frameHeader zfh; + + /* Extract Frame Header */ + { size_t const ret = ZSTD_getFrameHeader_advanced(&zfh, src, srcSize, format); + if (ZSTD_isError(ret)) + return ZSTD_errorFrameSizeInfo(ret); + if (ret > 0) + return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong)); + } + + ip += zfh.headerSize; + remainingSize -= zfh.headerSize; + + /* Iterate over each block */ + while (1) { + blockProperties_t blockProperties; + size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties); + if (ZSTD_isError(cBlockSize)) + return ZSTD_errorFrameSizeInfo(cBlockSize); + + if (ZSTD_blockHeaderSize + cBlockSize > remainingSize) + return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong)); + + ip += ZSTD_blockHeaderSize + cBlockSize; + remainingSize -= ZSTD_blockHeaderSize + cBlockSize; + nbBlocks++; + + if (blockProperties.lastBlock) break; + } + + /* Final frame content checksum */ + if (zfh.checksumFlag) { + if (remainingSize < 4) + return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong)); + ip += 4; + } + + frameSizeInfo.nbBlocks = nbBlocks; + frameSizeInfo.compressedSize = (size_t)(ip - ipstart); + frameSizeInfo.decompressedBound = (zfh.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN) + ? zfh.frameContentSize + : (unsigned long long)nbBlocks * zfh.blockSizeMax; + return frameSizeInfo; + } +} + +static size_t ZSTD_findFrameCompressedSize_advanced(const void *src, size_t srcSize, ZSTD_format_e format) { + ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize, format); + return frameSizeInfo.compressedSize; +} + +/** ZSTD_findFrameCompressedSize() : + * See docs in zstd.h + * Note: compatible with legacy mode */ +size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize) +{ + return ZSTD_findFrameCompressedSize_advanced(src, srcSize, ZSTD_f_zstd1); +} + +/** ZSTD_decompressBound() : + * compatible with legacy mode + * `src` must point to the start of a ZSTD frame or a skippeable frame + * `srcSize` must be at least as large as the frame contained + * @return : the maximum decompressed size of the compressed source + */ +unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize) +{ + unsigned long long bound = 0; + /* Iterate over each frame */ + while (srcSize > 0) { + ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize, ZSTD_f_zstd1); + size_t const compressedSize = frameSizeInfo.compressedSize; + unsigned long long const decompressedBound = frameSizeInfo.decompressedBound; + if (ZSTD_isError(compressedSize) || decompressedBound == ZSTD_CONTENTSIZE_ERROR) + return ZSTD_CONTENTSIZE_ERROR; + assert(srcSize >= compressedSize); + src = (const BYTE*)src + compressedSize; + srcSize -= compressedSize; + bound += decompressedBound; + } + return bound; +} + +size_t ZSTD_decompressionMargin(void const* src, size_t srcSize) +{ + size_t margin = 0; + unsigned maxBlockSize = 0; + + /* Iterate over each frame */ + while (srcSize > 0) { + ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize, ZSTD_f_zstd1); + size_t const compressedSize = frameSizeInfo.compressedSize; + unsigned long long const decompressedBound = frameSizeInfo.decompressedBound; + ZSTD_frameHeader zfh; + + FORWARD_IF_ERROR(ZSTD_getFrameHeader(&zfh, src, srcSize), ""); + if (ZSTD_isError(compressedSize) || decompressedBound == ZSTD_CONTENTSIZE_ERROR) + return ERROR(corruption_detected); + + if (zfh.frameType == ZSTD_frame) { + /* Add the frame header to our margin */ + margin += zfh.headerSize; + /* Add the checksum to our margin */ + margin += zfh.checksumFlag ? 4 : 0; + /* Add 3 bytes per block */ + margin += 3 * frameSizeInfo.nbBlocks; + + /* Compute the max block size */ + maxBlockSize = MAX(maxBlockSize, zfh.blockSizeMax); + } else { + assert(zfh.frameType == ZSTD_skippableFrame); + /* Add the entire skippable frame size to our margin. */ + margin += compressedSize; + } + + assert(srcSize >= compressedSize); + src = (const BYTE*)src + compressedSize; + srcSize -= compressedSize; + } + + /* Add the max block size back to the margin. */ + margin += maxBlockSize; + + return margin; +} + +/*-************************************************************* + * Frame decoding + ***************************************************************/ + +/** ZSTD_insertBlock() : + * insert `src` block into `dctx` history. Useful to track uncompressed blocks. */ +size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize) +{ + DEBUGLOG(5, "ZSTD_insertBlock: %u bytes", (unsigned)blockSize); + ZSTD_checkContinuity(dctx, blockStart, blockSize); + dctx->previousDstEnd = (const char*)blockStart + blockSize; + return blockSize; +} + + +static size_t ZSTD_copyRawBlock(void* dst, size_t dstCapacity, + const void* src, size_t srcSize) +{ + DEBUGLOG(5, "ZSTD_copyRawBlock"); + RETURN_ERROR_IF(srcSize > dstCapacity, dstSize_tooSmall, ""); + if (dst == NULL) { + if (srcSize == 0) return 0; + RETURN_ERROR(dstBuffer_null, ""); + } + ZSTD_memmove(dst, src, srcSize); + return srcSize; +} + +static size_t ZSTD_setRleBlock(void* dst, size_t dstCapacity, + BYTE b, + size_t regenSize) +{ + RETURN_ERROR_IF(regenSize > dstCapacity, dstSize_tooSmall, ""); + if (dst == NULL) { + if (regenSize == 0) return 0; + RETURN_ERROR(dstBuffer_null, ""); + } + ZSTD_memset(dst, b, regenSize); + return regenSize; +} + +static void ZSTD_DCtx_trace_end(ZSTD_DCtx const* dctx, U64 uncompressedSize, U64 compressedSize, unsigned streaming) +{ +#if ZSTD_TRACE + if (dctx->traceCtx && ZSTD_trace_decompress_end != NULL) { + ZSTD_Trace trace; + ZSTD_memset(&trace, 0, sizeof(trace)); + trace.version = ZSTD_VERSION_NUMBER; + trace.streaming = streaming; + if (dctx->ddict) { + trace.dictionaryID = ZSTD_getDictID_fromDDict(dctx->ddict); + trace.dictionarySize = ZSTD_DDict_dictSize(dctx->ddict); + trace.dictionaryIsCold = dctx->ddictIsCold; + } + trace.uncompressedSize = (size_t)uncompressedSize; + trace.compressedSize = (size_t)compressedSize; + trace.dctx = dctx; + ZSTD_trace_decompress_end(dctx->traceCtx, &trace); + } +#else + (void)dctx; + (void)uncompressedSize; + (void)compressedSize; + (void)streaming; +#endif +} + + +/*! ZSTD_decompressFrame() : + * @dctx must be properly initialized + * will update *srcPtr and *srcSizePtr, + * to make *srcPtr progress by one frame. */ +static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void** srcPtr, size_t *srcSizePtr) +{ + const BYTE* const istart = (const BYTE*)(*srcPtr); + const BYTE* ip = istart; + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = dstCapacity != 0 ? ostart + dstCapacity : ostart; + BYTE* op = ostart; + size_t remainingSrcSize = *srcSizePtr; + + DEBUGLOG(4, "ZSTD_decompressFrame (srcSize:%i)", (int)*srcSizePtr); + + /* check */ + RETURN_ERROR_IF( + remainingSrcSize < ZSTD_FRAMEHEADERSIZE_MIN(dctx->format)+ZSTD_blockHeaderSize, + srcSize_wrong, ""); + + /* Frame Header */ + { size_t const frameHeaderSize = ZSTD_frameHeaderSize_internal( + ip, ZSTD_FRAMEHEADERSIZE_PREFIX(dctx->format), dctx->format); + if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize; + RETURN_ERROR_IF(remainingSrcSize < frameHeaderSize+ZSTD_blockHeaderSize, + srcSize_wrong, ""); + FORWARD_IF_ERROR( ZSTD_decodeFrameHeader(dctx, ip, frameHeaderSize) , ""); + ip += frameHeaderSize; remainingSrcSize -= frameHeaderSize; + } + + /* Shrink the blockSizeMax if enabled */ + if (dctx->maxBlockSizeParam != 0) + dctx->fParams.blockSizeMax = MIN(dctx->fParams.blockSizeMax, (unsigned)dctx->maxBlockSizeParam); + + /* Loop on each block */ + while (1) { + BYTE* oBlockEnd = oend; + size_t decodedSize; + blockProperties_t blockProperties; + size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSrcSize, &blockProperties); + if (ZSTD_isError(cBlockSize)) return cBlockSize; + + ip += ZSTD_blockHeaderSize; + remainingSrcSize -= ZSTD_blockHeaderSize; + RETURN_ERROR_IF(cBlockSize > remainingSrcSize, srcSize_wrong, ""); + + if (ip >= op && ip < oBlockEnd) { + /* We are decompressing in-place. Limit the output pointer so that we + * don't overwrite the block that we are currently reading. This will + * fail decompression if the input & output pointers aren't spaced + * far enough apart. + * + * This is important to set, even when the pointers are far enough + * apart, because ZSTD_decompressBlock_internal() can decide to store + * literals in the output buffer, after the block it is decompressing. + * Since we don't want anything to overwrite our input, we have to tell + * ZSTD_decompressBlock_internal to never write past ip. + * + * See ZSTD_allocateLiteralsBuffer() for reference. + */ + oBlockEnd = op + (ip - op); + } + + switch(blockProperties.blockType) + { + case bt_compressed: + assert(dctx->isFrameDecompression == 1); + decodedSize = ZSTD_decompressBlock_internal(dctx, op, (size_t)(oBlockEnd-op), ip, cBlockSize, not_streaming); + break; + case bt_raw : + /* Use oend instead of oBlockEnd because this function is safe to overlap. It uses memmove. */ + decodedSize = ZSTD_copyRawBlock(op, (size_t)(oend-op), ip, cBlockSize); + break; + case bt_rle : + decodedSize = ZSTD_setRleBlock(op, (size_t)(oBlockEnd-op), *ip, blockProperties.origSize); + break; + case bt_reserved : + default: + RETURN_ERROR(corruption_detected, "invalid block type"); + } + FORWARD_IF_ERROR(decodedSize, "Block decompression failure"); + DEBUGLOG(5, "Decompressed block of dSize = %u", (unsigned)decodedSize); + if (dctx->validateChecksum) { + XXH64_update(&dctx->xxhState, op, decodedSize); + } + if (decodedSize) /* support dst = NULL,0 */ { + op += decodedSize; + } + assert(ip != NULL); + ip += cBlockSize; + remainingSrcSize -= cBlockSize; + if (blockProperties.lastBlock) break; + } + + if (dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN) { + RETURN_ERROR_IF((U64)(op-ostart) != dctx->fParams.frameContentSize, + corruption_detected, ""); + } + if (dctx->fParams.checksumFlag) { /* Frame content checksum verification */ + RETURN_ERROR_IF(remainingSrcSize<4, checksum_wrong, ""); + if (!dctx->forceIgnoreChecksum) { + U32 const checkCalc = (U32)XXH64_digest(&dctx->xxhState); + U32 checkRead; + checkRead = MEM_readLE32(ip); + RETURN_ERROR_IF(checkRead != checkCalc, checksum_wrong, ""); + } + ip += 4; + remainingSrcSize -= 4; + } + ZSTD_DCtx_trace_end(dctx, (U64)(op-ostart), (U64)(ip-istart), /* streaming */ 0); + /* Allow caller to get size read */ + DEBUGLOG(4, "ZSTD_decompressFrame: decompressed frame of size %zi, consuming %zi bytes of input", op-ostart, ip - (const BYTE*)*srcPtr); + *srcPtr = ip; + *srcSizePtr = remainingSrcSize; + return (size_t)(op-ostart); +} + +static +ZSTD_ALLOW_POINTER_OVERFLOW_ATTR +size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict, size_t dictSize, + const ZSTD_DDict* ddict) +{ + void* const dststart = dst; + int moreThan1Frame = 0; + + DEBUGLOG(5, "ZSTD_decompressMultiFrame"); + assert(dict==NULL || ddict==NULL); /* either dict or ddict set, not both */ + + if (ddict) { + dict = ZSTD_DDict_dictContent(ddict); + dictSize = ZSTD_DDict_dictSize(ddict); + } + + while (srcSize >= ZSTD_startingInputLength(dctx->format)) { + +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) + if (dctx->format == ZSTD_f_zstd1 && ZSTD_isLegacy(src, srcSize)) { + size_t decodedSize; + size_t const frameSize = ZSTD_findFrameCompressedSizeLegacy(src, srcSize); + if (ZSTD_isError(frameSize)) return frameSize; + RETURN_ERROR_IF(dctx->staticSize, memory_allocation, + "legacy support is not compatible with static dctx"); + + decodedSize = ZSTD_decompressLegacy(dst, dstCapacity, src, frameSize, dict, dictSize); + if (ZSTD_isError(decodedSize)) return decodedSize; + + { + unsigned long long const expectedSize = ZSTD_getFrameContentSize(src, srcSize); + RETURN_ERROR_IF(expectedSize == ZSTD_CONTENTSIZE_ERROR, corruption_detected, "Corrupted frame header!"); + if (expectedSize != ZSTD_CONTENTSIZE_UNKNOWN) { + RETURN_ERROR_IF(expectedSize != decodedSize, corruption_detected, + "Frame header size does not match decoded size!"); + } + } + + assert(decodedSize <= dstCapacity); + dst = (BYTE*)dst + decodedSize; + dstCapacity -= decodedSize; + + src = (const BYTE*)src + frameSize; + srcSize -= frameSize; + + continue; + } +#endif + + if (dctx->format == ZSTD_f_zstd1 && srcSize >= 4) { + U32 const magicNumber = MEM_readLE32(src); + DEBUGLOG(5, "reading magic number %08X", (unsigned)magicNumber); + if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { + /* skippable frame detected : skip it */ + size_t const skippableSize = readSkippableFrameSize(src, srcSize); + FORWARD_IF_ERROR(skippableSize, "invalid skippable frame"); + assert(skippableSize <= srcSize); + + src = (const BYTE *)src + skippableSize; + srcSize -= skippableSize; + continue; /* check next frame */ + } } + + if (ddict) { + /* we were called from ZSTD_decompress_usingDDict */ + FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(dctx, ddict), ""); + } else { + /* this will initialize correctly with no dict if dict == NULL, so + * use this in all cases but ddict */ + FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDict(dctx, dict, dictSize), ""); + } + ZSTD_checkContinuity(dctx, dst, dstCapacity); + + { const size_t res = ZSTD_decompressFrame(dctx, dst, dstCapacity, + &src, &srcSize); + RETURN_ERROR_IF( + (ZSTD_getErrorCode(res) == ZSTD_error_prefix_unknown) + && (moreThan1Frame==1), + srcSize_wrong, + "At least one frame successfully completed, " + "but following bytes are garbage: " + "it's more likely to be a srcSize error, " + "specifying more input bytes than size of frame(s). " + "Note: one could be unlucky, it might be a corruption error instead, " + "happening right at the place where we expect zstd magic bytes. " + "But this is _much_ less likely than a srcSize field error."); + if (ZSTD_isError(res)) return res; + assert(res <= dstCapacity); + if (res != 0) + dst = (BYTE*)dst + res; + dstCapacity -= res; + } + moreThan1Frame = 1; + } /* while (srcSize >= ZSTD_frameHeaderSize_prefix) */ + + RETURN_ERROR_IF(srcSize, srcSize_wrong, "input not entirely consumed"); + + return (size_t)((BYTE*)dst - (BYTE*)dststart); +} + +size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict, size_t dictSize) +{ + return ZSTD_decompressMultiFrame(dctx, dst, dstCapacity, src, srcSize, dict, dictSize, NULL); +} + + +static ZSTD_DDict const* ZSTD_getDDict(ZSTD_DCtx* dctx) +{ + switch (dctx->dictUses) { + default: + assert(0 /* Impossible */); + ZSTD_FALLTHROUGH; + case ZSTD_dont_use: + ZSTD_clearDict(dctx); + return NULL; + case ZSTD_use_indefinitely: + return dctx->ddict; + case ZSTD_use_once: + dctx->dictUses = ZSTD_dont_use; + return dctx->ddict; + } +} + +size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + return ZSTD_decompress_usingDDict(dctx, dst, dstCapacity, src, srcSize, ZSTD_getDDict(dctx)); +} + + +size_t ZSTD_decompress(void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ +#if defined(ZSTD_HEAPMODE) && (ZSTD_HEAPMODE>=1) + size_t regenSize; + ZSTD_DCtx* const dctx = ZSTD_createDCtx_internal(ZSTD_defaultCMem); + RETURN_ERROR_IF(dctx==NULL, memory_allocation, "NULL pointer!"); + regenSize = ZSTD_decompressDCtx(dctx, dst, dstCapacity, src, srcSize); + ZSTD_freeDCtx(dctx); + return regenSize; +#else /* stack mode */ + ZSTD_DCtx dctx; + ZSTD_initDCtx_internal(&dctx); + return ZSTD_decompressDCtx(&dctx, dst, dstCapacity, src, srcSize); +#endif +} + + +/*-************************************** +* Advanced Streaming Decompression API +* Bufferless and synchronous +****************************************/ +size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx) { return dctx->expected; } + +/** + * Similar to ZSTD_nextSrcSizeToDecompress(), but when a block input can be streamed, we + * allow taking a partial block as the input. Currently only raw uncompressed blocks can + * be streamed. + * + * For blocks that can be streamed, this allows us to reduce the latency until we produce + * output, and avoid copying the input. + * + * @param inputSize - The total amount of input that the caller currently has. + */ +static size_t ZSTD_nextSrcSizeToDecompressWithInputSize(ZSTD_DCtx* dctx, size_t inputSize) { + if (!(dctx->stage == ZSTDds_decompressBlock || dctx->stage == ZSTDds_decompressLastBlock)) + return dctx->expected; + if (dctx->bType != bt_raw) + return dctx->expected; + return BOUNDED(1, inputSize, dctx->expected); +} + +ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx) { + switch(dctx->stage) + { + default: /* should not happen */ + assert(0); + ZSTD_FALLTHROUGH; + case ZSTDds_getFrameHeaderSize: + ZSTD_FALLTHROUGH; + case ZSTDds_decodeFrameHeader: + return ZSTDnit_frameHeader; + case ZSTDds_decodeBlockHeader: + return ZSTDnit_blockHeader; + case ZSTDds_decompressBlock: + return ZSTDnit_block; + case ZSTDds_decompressLastBlock: + return ZSTDnit_lastBlock; + case ZSTDds_checkChecksum: + return ZSTDnit_checksum; + case ZSTDds_decodeSkippableHeader: + ZSTD_FALLTHROUGH; + case ZSTDds_skipFrame: + return ZSTDnit_skippableFrame; + } +} + +static int ZSTD_isSkipFrame(ZSTD_DCtx* dctx) { return dctx->stage == ZSTDds_skipFrame; } + +/** ZSTD_decompressContinue() : + * srcSize : must be the exact nb of bytes expected (see ZSTD_nextSrcSizeToDecompress()) + * @return : nb of bytes generated into `dst` (necessarily <= `dstCapacity) + * or an error code, which can be tested using ZSTD_isError() */ +size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + DEBUGLOG(5, "ZSTD_decompressContinue (srcSize:%u)", (unsigned)srcSize); + /* Sanity check */ + RETURN_ERROR_IF(srcSize != ZSTD_nextSrcSizeToDecompressWithInputSize(dctx, srcSize), srcSize_wrong, "not allowed"); + ZSTD_checkContinuity(dctx, dst, dstCapacity); + + dctx->processedCSize += srcSize; + + switch (dctx->stage) + { + case ZSTDds_getFrameHeaderSize : + assert(src != NULL); + if (dctx->format == ZSTD_f_zstd1) { /* allows header */ + assert(srcSize >= ZSTD_FRAMEIDSIZE); /* to read skippable magic number */ + if ((MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */ + ZSTD_memcpy(dctx->headerBuffer, src, srcSize); + dctx->expected = ZSTD_SKIPPABLEHEADERSIZE - srcSize; /* remaining to load to get full skippable frame header */ + dctx->stage = ZSTDds_decodeSkippableHeader; + return 0; + } } + dctx->headerSize = ZSTD_frameHeaderSize_internal(src, srcSize, dctx->format); + if (ZSTD_isError(dctx->headerSize)) return dctx->headerSize; + ZSTD_memcpy(dctx->headerBuffer, src, srcSize); + dctx->expected = dctx->headerSize - srcSize; + dctx->stage = ZSTDds_decodeFrameHeader; + return 0; + + case ZSTDds_decodeFrameHeader: + assert(src != NULL); + ZSTD_memcpy(dctx->headerBuffer + (dctx->headerSize - srcSize), src, srcSize); + FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(dctx, dctx->headerBuffer, dctx->headerSize), ""); + dctx->expected = ZSTD_blockHeaderSize; + dctx->stage = ZSTDds_decodeBlockHeader; + return 0; + + case ZSTDds_decodeBlockHeader: + { blockProperties_t bp; + size_t const cBlockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp); + if (ZSTD_isError(cBlockSize)) return cBlockSize; + RETURN_ERROR_IF(cBlockSize > dctx->fParams.blockSizeMax, corruption_detected, "Block Size Exceeds Maximum"); + dctx->expected = cBlockSize; + dctx->bType = bp.blockType; + dctx->rleSize = bp.origSize; + if (cBlockSize) { + dctx->stage = bp.lastBlock ? ZSTDds_decompressLastBlock : ZSTDds_decompressBlock; + return 0; + } + /* empty block */ + if (bp.lastBlock) { + if (dctx->fParams.checksumFlag) { + dctx->expected = 4; + dctx->stage = ZSTDds_checkChecksum; + } else { + dctx->expected = 0; /* end of frame */ + dctx->stage = ZSTDds_getFrameHeaderSize; + } + } else { + dctx->expected = ZSTD_blockHeaderSize; /* jump to next header */ + dctx->stage = ZSTDds_decodeBlockHeader; + } + return 0; + } + + case ZSTDds_decompressLastBlock: + case ZSTDds_decompressBlock: + DEBUGLOG(5, "ZSTD_decompressContinue: case ZSTDds_decompressBlock"); + { size_t rSize; + switch(dctx->bType) + { + case bt_compressed: + DEBUGLOG(5, "ZSTD_decompressContinue: case bt_compressed"); + assert(dctx->isFrameDecompression == 1); + rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, is_streaming); + dctx->expected = 0; /* Streaming not supported */ + break; + case bt_raw : + assert(srcSize <= dctx->expected); + rSize = ZSTD_copyRawBlock(dst, dstCapacity, src, srcSize); + FORWARD_IF_ERROR(rSize, "ZSTD_copyRawBlock failed"); + assert(rSize == srcSize); + dctx->expected -= rSize; + break; + case bt_rle : + rSize = ZSTD_setRleBlock(dst, dstCapacity, *(const BYTE*)src, dctx->rleSize); + dctx->expected = 0; /* Streaming not supported */ + break; + case bt_reserved : /* should never happen */ + default: + RETURN_ERROR(corruption_detected, "invalid block type"); + } + FORWARD_IF_ERROR(rSize, ""); + RETURN_ERROR_IF(rSize > dctx->fParams.blockSizeMax, corruption_detected, "Decompressed Block Size Exceeds Maximum"); + DEBUGLOG(5, "ZSTD_decompressContinue: decoded size from block : %u", (unsigned)rSize); + dctx->decodedSize += rSize; + if (dctx->validateChecksum) XXH64_update(&dctx->xxhState, dst, rSize); + dctx->previousDstEnd = (char*)dst + rSize; + + /* Stay on the same stage until we are finished streaming the block. */ + if (dctx->expected > 0) { + return rSize; + } + + if (dctx->stage == ZSTDds_decompressLastBlock) { /* end of frame */ + DEBUGLOG(4, "ZSTD_decompressContinue: decoded size from frame : %u", (unsigned)dctx->decodedSize); + RETURN_ERROR_IF( + dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN + && dctx->decodedSize != dctx->fParams.frameContentSize, + corruption_detected, ""); + if (dctx->fParams.checksumFlag) { /* another round for frame checksum */ + dctx->expected = 4; + dctx->stage = ZSTDds_checkChecksum; + } else { + ZSTD_DCtx_trace_end(dctx, dctx->decodedSize, dctx->processedCSize, /* streaming */ 1); + dctx->expected = 0; /* ends here */ + dctx->stage = ZSTDds_getFrameHeaderSize; + } + } else { + dctx->stage = ZSTDds_decodeBlockHeader; + dctx->expected = ZSTD_blockHeaderSize; + } + return rSize; + } + + case ZSTDds_checkChecksum: + assert(srcSize == 4); /* guaranteed by dctx->expected */ + { + if (dctx->validateChecksum) { + U32 const h32 = (U32)XXH64_digest(&dctx->xxhState); + U32 const check32 = MEM_readLE32(src); + DEBUGLOG(4, "ZSTD_decompressContinue: checksum : calculated %08X :: %08X read", (unsigned)h32, (unsigned)check32); + RETURN_ERROR_IF(check32 != h32, checksum_wrong, ""); + } + ZSTD_DCtx_trace_end(dctx, dctx->decodedSize, dctx->processedCSize, /* streaming */ 1); + dctx->expected = 0; + dctx->stage = ZSTDds_getFrameHeaderSize; + return 0; + } + + case ZSTDds_decodeSkippableHeader: + assert(src != NULL); + assert(srcSize <= ZSTD_SKIPPABLEHEADERSIZE); + assert(dctx->format != ZSTD_f_zstd1_magicless); + ZSTD_memcpy(dctx->headerBuffer + (ZSTD_SKIPPABLEHEADERSIZE - srcSize), src, srcSize); /* complete skippable header */ + dctx->expected = MEM_readLE32(dctx->headerBuffer + ZSTD_FRAMEIDSIZE); /* note : dctx->expected can grow seriously large, beyond local buffer size */ + dctx->stage = ZSTDds_skipFrame; + return 0; + + case ZSTDds_skipFrame: + dctx->expected = 0; + dctx->stage = ZSTDds_getFrameHeaderSize; + return 0; + + default: + assert(0); /* impossible */ + RETURN_ERROR(GENERIC, "impossible to reach"); /* some compilers require default to do something */ + } +} + + +static size_t ZSTD_refDictContent(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) +{ + dctx->dictEnd = dctx->previousDstEnd; + dctx->virtualStart = (const char*)dict - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart)); + dctx->prefixStart = dict; + dctx->previousDstEnd = (const char*)dict + dictSize; +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + dctx->dictContentBeginForFuzzing = dctx->prefixStart; + dctx->dictContentEndForFuzzing = dctx->previousDstEnd; +#endif + return 0; +} + +/*! ZSTD_loadDEntropy() : + * dict : must point at beginning of a valid zstd dictionary. + * @return : size of entropy tables read */ +size_t +ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy, + const void* const dict, size_t const dictSize) +{ + const BYTE* dictPtr = (const BYTE*)dict; + const BYTE* const dictEnd = dictPtr + dictSize; + + RETURN_ERROR_IF(dictSize <= 8, dictionary_corrupted, "dict is too small"); + assert(MEM_readLE32(dict) == ZSTD_MAGIC_DICTIONARY); /* dict must be valid */ + dictPtr += 8; /* skip header = magic + dictID */ + + ZSTD_STATIC_ASSERT(offsetof(ZSTD_entropyDTables_t, OFTable) == offsetof(ZSTD_entropyDTables_t, LLTable) + sizeof(entropy->LLTable)); + ZSTD_STATIC_ASSERT(offsetof(ZSTD_entropyDTables_t, MLTable) == offsetof(ZSTD_entropyDTables_t, OFTable) + sizeof(entropy->OFTable)); + ZSTD_STATIC_ASSERT(sizeof(entropy->LLTable) + sizeof(entropy->OFTable) + sizeof(entropy->MLTable) >= HUF_DECOMPRESS_WORKSPACE_SIZE); + { void* const workspace = &entropy->LLTable; /* use fse tables as temporary workspace; implies fse tables are grouped together */ + size_t const workspaceSize = sizeof(entropy->LLTable) + sizeof(entropy->OFTable) + sizeof(entropy->MLTable); +#ifdef HUF_FORCE_DECOMPRESS_X1 + /* in minimal huffman, we always use X1 variants */ + size_t const hSize = HUF_readDTableX1_wksp(entropy->hufTable, + dictPtr, dictEnd - dictPtr, + workspace, workspaceSize, /* flags */ 0); +#else + size_t const hSize = HUF_readDTableX2_wksp(entropy->hufTable, + dictPtr, (size_t)(dictEnd - dictPtr), + workspace, workspaceSize, /* flags */ 0); +#endif + RETURN_ERROR_IF(HUF_isError(hSize), dictionary_corrupted, ""); + dictPtr += hSize; + } + + { short offcodeNCount[MaxOff+1]; + unsigned offcodeMaxValue = MaxOff, offcodeLog; + size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, (size_t)(dictEnd-dictPtr)); + RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted, ""); + RETURN_ERROR_IF(offcodeMaxValue > MaxOff, dictionary_corrupted, ""); + RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted, ""); + ZSTD_buildFSETable( entropy->OFTable, + offcodeNCount, offcodeMaxValue, + OF_base, OF_bits, + offcodeLog, + entropy->workspace, sizeof(entropy->workspace), + /* bmi2 */0); + dictPtr += offcodeHeaderSize; + } + + { short matchlengthNCount[MaxML+1]; + unsigned matchlengthMaxValue = MaxML, matchlengthLog; + size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, (size_t)(dictEnd-dictPtr)); + RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted, ""); + RETURN_ERROR_IF(matchlengthMaxValue > MaxML, dictionary_corrupted, ""); + RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted, ""); + ZSTD_buildFSETable( entropy->MLTable, + matchlengthNCount, matchlengthMaxValue, + ML_base, ML_bits, + matchlengthLog, + entropy->workspace, sizeof(entropy->workspace), + /* bmi2 */ 0); + dictPtr += matchlengthHeaderSize; + } + + { short litlengthNCount[MaxLL+1]; + unsigned litlengthMaxValue = MaxLL, litlengthLog; + size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, (size_t)(dictEnd-dictPtr)); + RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted, ""); + RETURN_ERROR_IF(litlengthMaxValue > MaxLL, dictionary_corrupted, ""); + RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted, ""); + ZSTD_buildFSETable( entropy->LLTable, + litlengthNCount, litlengthMaxValue, + LL_base, LL_bits, + litlengthLog, + entropy->workspace, sizeof(entropy->workspace), + /* bmi2 */ 0); + dictPtr += litlengthHeaderSize; + } + + RETURN_ERROR_IF(dictPtr+12 > dictEnd, dictionary_corrupted, ""); + { int i; + size_t const dictContentSize = (size_t)(dictEnd - (dictPtr+12)); + for (i=0; i<3; i++) { + U32 const rep = MEM_readLE32(dictPtr); dictPtr += 4; + RETURN_ERROR_IF(rep==0 || rep > dictContentSize, + dictionary_corrupted, ""); + entropy->rep[i] = rep; + } } + + return (size_t)(dictPtr - (const BYTE*)dict); +} + +static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) +{ + if (dictSize < 8) return ZSTD_refDictContent(dctx, dict, dictSize); + { U32 const magic = MEM_readLE32(dict); + if (magic != ZSTD_MAGIC_DICTIONARY) { + return ZSTD_refDictContent(dctx, dict, dictSize); /* pure content mode */ + } } + dctx->dictID = MEM_readLE32((const char*)dict + ZSTD_FRAMEIDSIZE); + + /* load entropy tables */ + { size_t const eSize = ZSTD_loadDEntropy(&dctx->entropy, dict, dictSize); + RETURN_ERROR_IF(ZSTD_isError(eSize), dictionary_corrupted, ""); + dict = (const char*)dict + eSize; + dictSize -= eSize; + } + dctx->litEntropy = dctx->fseEntropy = 1; + + /* reference dictionary content */ + return ZSTD_refDictContent(dctx, dict, dictSize); +} + +size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx) +{ + assert(dctx != NULL); +#if ZSTD_TRACE + dctx->traceCtx = (ZSTD_trace_decompress_begin != NULL) ? ZSTD_trace_decompress_begin(dctx) : 0; +#endif + dctx->expected = ZSTD_startingInputLength(dctx->format); /* dctx->format must be properly set */ + dctx->stage = ZSTDds_getFrameHeaderSize; + dctx->processedCSize = 0; + dctx->decodedSize = 0; + dctx->previousDstEnd = NULL; + dctx->prefixStart = NULL; + dctx->virtualStart = NULL; + dctx->dictEnd = NULL; + dctx->entropy.hufTable[0] = (HUF_DTable)((ZSTD_HUFFDTABLE_CAPACITY_LOG)*0x1000001); /* cover both little and big endian */ + dctx->litEntropy = dctx->fseEntropy = 0; + dctx->dictID = 0; + dctx->bType = bt_reserved; + dctx->isFrameDecompression = 1; + ZSTD_STATIC_ASSERT(sizeof(dctx->entropy.rep) == sizeof(repStartValue)); + ZSTD_memcpy(dctx->entropy.rep, repStartValue, sizeof(repStartValue)); /* initial repcodes */ + dctx->LLTptr = dctx->entropy.LLTable; + dctx->MLTptr = dctx->entropy.MLTable; + dctx->OFTptr = dctx->entropy.OFTable; + dctx->HUFptr = dctx->entropy.hufTable; + return 0; +} + +size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) +{ + FORWARD_IF_ERROR( ZSTD_decompressBegin(dctx) , ""); + if (dict && dictSize) + RETURN_ERROR_IF( + ZSTD_isError(ZSTD_decompress_insertDictionary(dctx, dict, dictSize)), + dictionary_corrupted, ""); + return 0; +} + + +/* ====== ZSTD_DDict ====== */ + +size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict) +{ + DEBUGLOG(4, "ZSTD_decompressBegin_usingDDict"); + assert(dctx != NULL); + if (ddict) { + const char* const dictStart = (const char*)ZSTD_DDict_dictContent(ddict); + size_t const dictSize = ZSTD_DDict_dictSize(ddict); + const void* const dictEnd = dictStart + dictSize; + dctx->ddictIsCold = (dctx->dictEnd != dictEnd); + DEBUGLOG(4, "DDict is %s", + dctx->ddictIsCold ? "~cold~" : "hot!"); + } + FORWARD_IF_ERROR( ZSTD_decompressBegin(dctx) , ""); + if (ddict) { /* NULL ddict is equivalent to no dictionary */ + ZSTD_copyDDictParameters(dctx, ddict); + } + return 0; +} + +/*! ZSTD_getDictID_fromDict() : + * Provides the dictID stored within dictionary. + * if @return == 0, the dictionary is not conformant with Zstandard specification. + * It can still be loaded, but as a content-only dictionary. */ +unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize) +{ + if (dictSize < 8) return 0; + if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) return 0; + return MEM_readLE32((const char*)dict + ZSTD_FRAMEIDSIZE); +} + +/*! ZSTD_getDictID_fromFrame() : + * Provides the dictID required to decompress frame stored within `src`. + * If @return == 0, the dictID could not be decoded. + * This could for one of the following reasons : + * - The frame does not require a dictionary (most common case). + * - The frame was built with dictID intentionally removed. + * Needed dictionary is a hidden piece of information. + * Note : this use case also happens when using a non-conformant dictionary. + * - `srcSize` is too small, and as a result, frame header could not be decoded. + * Note : possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`. + * - This is not a Zstandard frame. + * When identifying the exact failure cause, it's possible to use + * ZSTD_getFrameHeader(), which will provide a more precise error code. */ +unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize) +{ + ZSTD_frameHeader zfp = { 0, 0, 0, ZSTD_frame, 0, 0, 0, 0, 0 }; + size_t const hError = ZSTD_getFrameHeader(&zfp, src, srcSize); + if (ZSTD_isError(hError)) return 0; + return zfp.dictID; +} + + +/*! ZSTD_decompress_usingDDict() : +* Decompression using a pre-digested Dictionary +* Use dictionary without significant overhead. */ +size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_DDict* ddict) +{ + /* pass content and size in case legacy frames are encountered */ + return ZSTD_decompressMultiFrame(dctx, dst, dstCapacity, src, srcSize, + NULL, 0, + ddict); +} + + +/*===================================== +* Streaming decompression +*====================================*/ + +ZSTD_DStream* ZSTD_createDStream(void) +{ + DEBUGLOG(3, "ZSTD_createDStream"); + return ZSTD_createDCtx_internal(ZSTD_defaultCMem); +} + +ZSTD_DStream* ZSTD_initStaticDStream(void *workspace, size_t workspaceSize) +{ + return ZSTD_initStaticDCtx(workspace, workspaceSize); +} + +ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem) +{ + return ZSTD_createDCtx_internal(customMem); +} + +size_t ZSTD_freeDStream(ZSTD_DStream* zds) +{ + return ZSTD_freeDCtx(zds); +} + + +/* *** Initialization *** */ + +size_t ZSTD_DStreamInSize(void) { return ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize; } +size_t ZSTD_DStreamOutSize(void) { return ZSTD_BLOCKSIZE_MAX; } + +size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType) +{ + RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, ""); + ZSTD_clearDict(dctx); + if (dict && dictSize != 0) { + dctx->ddictLocal = ZSTD_createDDict_advanced(dict, dictSize, dictLoadMethod, dictContentType, dctx->customMem); + RETURN_ERROR_IF(dctx->ddictLocal == NULL, memory_allocation, "NULL pointer!"); + dctx->ddict = dctx->ddictLocal; + dctx->dictUses = ZSTD_use_indefinitely; + } + return 0; +} + +size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) +{ + return ZSTD_DCtx_loadDictionary_advanced(dctx, dict, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto); +} + +size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) +{ + return ZSTD_DCtx_loadDictionary_advanced(dctx, dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto); +} + +size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType) +{ + FORWARD_IF_ERROR(ZSTD_DCtx_loadDictionary_advanced(dctx, prefix, prefixSize, ZSTD_dlm_byRef, dictContentType), ""); + dctx->dictUses = ZSTD_use_once; + return 0; +} + +size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize) +{ + return ZSTD_DCtx_refPrefix_advanced(dctx, prefix, prefixSize, ZSTD_dct_rawContent); +} + + +/* ZSTD_initDStream_usingDict() : + * return : expected size, aka ZSTD_startingInputLength(). + * this function cannot fail */ +size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize) +{ + DEBUGLOG(4, "ZSTD_initDStream_usingDict"); + FORWARD_IF_ERROR( ZSTD_DCtx_reset(zds, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_DCtx_loadDictionary(zds, dict, dictSize) , ""); + return ZSTD_startingInputLength(zds->format); +} + +/* note : this variant can't fail */ +size_t ZSTD_initDStream(ZSTD_DStream* zds) +{ + DEBUGLOG(4, "ZSTD_initDStream"); + FORWARD_IF_ERROR(ZSTD_DCtx_reset(zds, ZSTD_reset_session_only), ""); + FORWARD_IF_ERROR(ZSTD_DCtx_refDDict(zds, NULL), ""); + return ZSTD_startingInputLength(zds->format); +} + +/* ZSTD_initDStream_usingDDict() : + * ddict will just be referenced, and must outlive decompression session + * this function cannot fail */ +size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* dctx, const ZSTD_DDict* ddict) +{ + DEBUGLOG(4, "ZSTD_initDStream_usingDDict"); + FORWARD_IF_ERROR( ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_DCtx_refDDict(dctx, ddict) , ""); + return ZSTD_startingInputLength(dctx->format); +} + +/* ZSTD_resetDStream() : + * return : expected size, aka ZSTD_startingInputLength(). + * this function cannot fail */ +size_t ZSTD_resetDStream(ZSTD_DStream* dctx) +{ + DEBUGLOG(4, "ZSTD_resetDStream"); + FORWARD_IF_ERROR(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only), ""); + return ZSTD_startingInputLength(dctx->format); +} + + +size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict) +{ + RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, ""); + ZSTD_clearDict(dctx); + if (ddict) { + dctx->ddict = ddict; + dctx->dictUses = ZSTD_use_indefinitely; + if (dctx->refMultipleDDicts == ZSTD_rmd_refMultipleDDicts) { + if (dctx->ddictSet == NULL) { + dctx->ddictSet = ZSTD_createDDictHashSet(dctx->customMem); + if (!dctx->ddictSet) { + RETURN_ERROR(memory_allocation, "Failed to allocate memory for hash set!"); + } + } + assert(!dctx->staticSize); /* Impossible: ddictSet cannot have been allocated if static dctx */ + FORWARD_IF_ERROR(ZSTD_DDictHashSet_addDDict(dctx->ddictSet, ddict, dctx->customMem), ""); + } + } + return 0; +} + +/* ZSTD_DCtx_setMaxWindowSize() : + * note : no direct equivalence in ZSTD_DCtx_setParameter, + * since this version sets windowSize, and the other sets windowLog */ +size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize) +{ + ZSTD_bounds const bounds = ZSTD_dParam_getBounds(ZSTD_d_windowLogMax); + size_t const min = (size_t)1 << bounds.lowerBound; + size_t const max = (size_t)1 << bounds.upperBound; + RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, ""); + RETURN_ERROR_IF(maxWindowSize < min, parameter_outOfBound, ""); + RETURN_ERROR_IF(maxWindowSize > max, parameter_outOfBound, ""); + dctx->maxWindowSize = maxWindowSize; + return 0; +} + +size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format) +{ + return ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, (int)format); +} + +ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam) +{ + ZSTD_bounds bounds = { 0, 0, 0 }; + switch(dParam) { + case ZSTD_d_windowLogMax: + bounds.lowerBound = ZSTD_WINDOWLOG_ABSOLUTEMIN; + bounds.upperBound = ZSTD_WINDOWLOG_MAX; + return bounds; + case ZSTD_d_format: + bounds.lowerBound = (int)ZSTD_f_zstd1; + bounds.upperBound = (int)ZSTD_f_zstd1_magicless; + ZSTD_STATIC_ASSERT(ZSTD_f_zstd1 < ZSTD_f_zstd1_magicless); + return bounds; + case ZSTD_d_stableOutBuffer: + bounds.lowerBound = (int)ZSTD_bm_buffered; + bounds.upperBound = (int)ZSTD_bm_stable; + return bounds; + case ZSTD_d_forceIgnoreChecksum: + bounds.lowerBound = (int)ZSTD_d_validateChecksum; + bounds.upperBound = (int)ZSTD_d_ignoreChecksum; + return bounds; + case ZSTD_d_refMultipleDDicts: + bounds.lowerBound = (int)ZSTD_rmd_refSingleDDict; + bounds.upperBound = (int)ZSTD_rmd_refMultipleDDicts; + return bounds; + case ZSTD_d_disableHuffmanAssembly: + bounds.lowerBound = 0; + bounds.upperBound = 1; + return bounds; + case ZSTD_d_maxBlockSize: + bounds.lowerBound = ZSTD_BLOCKSIZE_MAX_MIN; + bounds.upperBound = ZSTD_BLOCKSIZE_MAX; + return bounds; + + default:; + } + bounds.error = ERROR(parameter_unsupported); + return bounds; +} + +/* ZSTD_dParam_withinBounds: + * @return 1 if value is within dParam bounds, + * 0 otherwise */ +static int ZSTD_dParam_withinBounds(ZSTD_dParameter dParam, int value) +{ + ZSTD_bounds const bounds = ZSTD_dParam_getBounds(dParam); + if (ZSTD_isError(bounds.error)) return 0; + if (value < bounds.lowerBound) return 0; + if (value > bounds.upperBound) return 0; + return 1; +} + +#define CHECK_DBOUNDS(p,v) { \ + RETURN_ERROR_IF(!ZSTD_dParam_withinBounds(p, v), parameter_outOfBound, ""); \ +} + +size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int* value) +{ + switch (param) { + case ZSTD_d_windowLogMax: + *value = (int)ZSTD_highbit32((U32)dctx->maxWindowSize); + return 0; + case ZSTD_d_format: + *value = (int)dctx->format; + return 0; + case ZSTD_d_stableOutBuffer: + *value = (int)dctx->outBufferMode; + return 0; + case ZSTD_d_forceIgnoreChecksum: + *value = (int)dctx->forceIgnoreChecksum; + return 0; + case ZSTD_d_refMultipleDDicts: + *value = (int)dctx->refMultipleDDicts; + return 0; + case ZSTD_d_disableHuffmanAssembly: + *value = (int)dctx->disableHufAsm; + return 0; + case ZSTD_d_maxBlockSize: + *value = dctx->maxBlockSizeParam; + return 0; + default:; + } + RETURN_ERROR(parameter_unsupported, ""); +} + +size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter dParam, int value) +{ + RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, ""); + switch(dParam) { + case ZSTD_d_windowLogMax: + if (value == 0) value = ZSTD_WINDOWLOG_LIMIT_DEFAULT; + CHECK_DBOUNDS(ZSTD_d_windowLogMax, value); + dctx->maxWindowSize = ((size_t)1) << value; + return 0; + case ZSTD_d_format: + CHECK_DBOUNDS(ZSTD_d_format, value); + dctx->format = (ZSTD_format_e)value; + return 0; + case ZSTD_d_stableOutBuffer: + CHECK_DBOUNDS(ZSTD_d_stableOutBuffer, value); + dctx->outBufferMode = (ZSTD_bufferMode_e)value; + return 0; + case ZSTD_d_forceIgnoreChecksum: + CHECK_DBOUNDS(ZSTD_d_forceIgnoreChecksum, value); + dctx->forceIgnoreChecksum = (ZSTD_forceIgnoreChecksum_e)value; + return 0; + case ZSTD_d_refMultipleDDicts: + CHECK_DBOUNDS(ZSTD_d_refMultipleDDicts, value); + if (dctx->staticSize != 0) { + RETURN_ERROR(parameter_unsupported, "Static dctx does not support multiple DDicts!"); + } + dctx->refMultipleDDicts = (ZSTD_refMultipleDDicts_e)value; + return 0; + case ZSTD_d_disableHuffmanAssembly: + CHECK_DBOUNDS(ZSTD_d_disableHuffmanAssembly, value); + dctx->disableHufAsm = value != 0; + return 0; + case ZSTD_d_maxBlockSize: + if (value != 0) CHECK_DBOUNDS(ZSTD_d_maxBlockSize, value); + dctx->maxBlockSizeParam = value; + return 0; + default:; + } + RETURN_ERROR(parameter_unsupported, ""); +} + +size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset) +{ + if ( (reset == ZSTD_reset_session_only) + || (reset == ZSTD_reset_session_and_parameters) ) { + dctx->streamStage = zdss_init; + dctx->noForwardProgress = 0; + dctx->isFrameDecompression = 1; + } + if ( (reset == ZSTD_reset_parameters) + || (reset == ZSTD_reset_session_and_parameters) ) { + RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, ""); + ZSTD_clearDict(dctx); + ZSTD_DCtx_resetParameters(dctx); + } + return 0; +} + + +size_t ZSTD_sizeof_DStream(const ZSTD_DStream* dctx) +{ + return ZSTD_sizeof_DCtx(dctx); +} + +static size_t ZSTD_decodingBufferSize_internal(unsigned long long windowSize, unsigned long long frameContentSize, size_t blockSizeMax) +{ + size_t const blockSize = MIN((size_t)MIN(windowSize, ZSTD_BLOCKSIZE_MAX), blockSizeMax); + /* We need blockSize + WILDCOPY_OVERLENGTH worth of buffer so that if a block + * ends at windowSize + WILDCOPY_OVERLENGTH + 1 bytes, we can start writing + * the block at the beginning of the output buffer, and maintain a full window. + * + * We need another blockSize worth of buffer so that we can store split + * literals at the end of the block without overwriting the extDict window. + */ + unsigned long long const neededRBSize = windowSize + (blockSize * 2) + (WILDCOPY_OVERLENGTH * 2); + unsigned long long const neededSize = MIN(frameContentSize, neededRBSize); + size_t const minRBSize = (size_t) neededSize; + RETURN_ERROR_IF((unsigned long long)minRBSize != neededSize, + frameParameter_windowTooLarge, ""); + return minRBSize; +} + +size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize) +{ + return ZSTD_decodingBufferSize_internal(windowSize, frameContentSize, ZSTD_BLOCKSIZE_MAX); +} + +size_t ZSTD_estimateDStreamSize(size_t windowSize) +{ + size_t const blockSize = MIN(windowSize, ZSTD_BLOCKSIZE_MAX); + size_t const inBuffSize = blockSize; /* no block can be larger */ + size_t const outBuffSize = ZSTD_decodingBufferSize_min(windowSize, ZSTD_CONTENTSIZE_UNKNOWN); + return ZSTD_estimateDCtxSize() + inBuffSize + outBuffSize; +} + +size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize) +{ + U32 const windowSizeMax = 1U << ZSTD_WINDOWLOG_MAX; /* note : should be user-selectable, but requires an additional parameter (or a dctx) */ + ZSTD_frameHeader zfh; + size_t const err = ZSTD_getFrameHeader(&zfh, src, srcSize); + if (ZSTD_isError(err)) return err; + RETURN_ERROR_IF(err>0, srcSize_wrong, ""); + RETURN_ERROR_IF(zfh.windowSize > windowSizeMax, + frameParameter_windowTooLarge, ""); + return ZSTD_estimateDStreamSize((size_t)zfh.windowSize); +} + + +/* ***** Decompression ***** */ + +static int ZSTD_DCtx_isOverflow(ZSTD_DStream* zds, size_t const neededInBuffSize, size_t const neededOutBuffSize) +{ + return (zds->inBuffSize + zds->outBuffSize) >= (neededInBuffSize + neededOutBuffSize) * ZSTD_WORKSPACETOOLARGE_FACTOR; +} + +static void ZSTD_DCtx_updateOversizedDuration(ZSTD_DStream* zds, size_t const neededInBuffSize, size_t const neededOutBuffSize) +{ + if (ZSTD_DCtx_isOverflow(zds, neededInBuffSize, neededOutBuffSize)) + zds->oversizedDuration++; + else + zds->oversizedDuration = 0; +} + +static int ZSTD_DCtx_isOversizedTooLong(ZSTD_DStream* zds) +{ + return zds->oversizedDuration >= ZSTD_WORKSPACETOOLARGE_MAXDURATION; +} + +/* Checks that the output buffer hasn't changed if ZSTD_obm_stable is used. */ +static size_t ZSTD_checkOutBuffer(ZSTD_DStream const* zds, ZSTD_outBuffer const* output) +{ + ZSTD_outBuffer const expect = zds->expectedOutBuffer; + /* No requirement when ZSTD_obm_stable is not enabled. */ + if (zds->outBufferMode != ZSTD_bm_stable) + return 0; + /* Any buffer is allowed in zdss_init, this must be the same for every other call until + * the context is reset. + */ + if (zds->streamStage == zdss_init) + return 0; + /* The buffer must match our expectation exactly. */ + if (expect.dst == output->dst && expect.pos == output->pos && expect.size == output->size) + return 0; + RETURN_ERROR(dstBuffer_wrong, "ZSTD_d_stableOutBuffer enabled but output differs!"); +} + +/* Calls ZSTD_decompressContinue() with the right parameters for ZSTD_decompressStream() + * and updates the stage and the output buffer state. This call is extracted so it can be + * used both when reading directly from the ZSTD_inBuffer, and in buffered input mode. + * NOTE: You must break after calling this function since the streamStage is modified. + */ +static size_t ZSTD_decompressContinueStream( + ZSTD_DStream* zds, char** op, char* oend, + void const* src, size_t srcSize) { + int const isSkipFrame = ZSTD_isSkipFrame(zds); + if (zds->outBufferMode == ZSTD_bm_buffered) { + size_t const dstSize = isSkipFrame ? 0 : zds->outBuffSize - zds->outStart; + size_t const decodedSize = ZSTD_decompressContinue(zds, + zds->outBuff + zds->outStart, dstSize, src, srcSize); + FORWARD_IF_ERROR(decodedSize, ""); + if (!decodedSize && !isSkipFrame) { + zds->streamStage = zdss_read; + } else { + zds->outEnd = zds->outStart + decodedSize; + zds->streamStage = zdss_flush; + } + } else { + /* Write directly into the output buffer */ + size_t const dstSize = isSkipFrame ? 0 : (size_t)(oend - *op); + size_t const decodedSize = ZSTD_decompressContinue(zds, *op, dstSize, src, srcSize); + FORWARD_IF_ERROR(decodedSize, ""); + *op += decodedSize; + /* Flushing is not needed. */ + zds->streamStage = zdss_read; + assert(*op <= oend); + assert(zds->outBufferMode == ZSTD_bm_stable); + } + return 0; +} + +size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input) +{ + const char* const src = (const char*)input->src; + const char* const istart = input->pos != 0 ? src + input->pos : src; + const char* const iend = input->size != 0 ? src + input->size : src; + const char* ip = istart; + char* const dst = (char*)output->dst; + char* const ostart = output->pos != 0 ? dst + output->pos : dst; + char* const oend = output->size != 0 ? dst + output->size : dst; + char* op = ostart; + U32 someMoreWork = 1; + + DEBUGLOG(5, "ZSTD_decompressStream"); + RETURN_ERROR_IF( + input->pos > input->size, + srcSize_wrong, + "forbidden. in: pos: %u vs size: %u", + (U32)input->pos, (U32)input->size); + RETURN_ERROR_IF( + output->pos > output->size, + dstSize_tooSmall, + "forbidden. out: pos: %u vs size: %u", + (U32)output->pos, (U32)output->size); + DEBUGLOG(5, "input size : %u", (U32)(input->size - input->pos)); + FORWARD_IF_ERROR(ZSTD_checkOutBuffer(zds, output), ""); + + while (someMoreWork) { + switch(zds->streamStage) + { + case zdss_init : + DEBUGLOG(5, "stage zdss_init => transparent reset "); + zds->streamStage = zdss_loadHeader; + zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0; +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) + zds->legacyVersion = 0; +#endif + zds->hostageByte = 0; + zds->expectedOutBuffer = *output; + ZSTD_FALLTHROUGH; + + case zdss_loadHeader : + DEBUGLOG(5, "stage zdss_loadHeader (srcSize : %u)", (U32)(iend - ip)); +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) + if (zds->legacyVersion) { + RETURN_ERROR_IF(zds->staticSize, memory_allocation, + "legacy support is incompatible with static dctx"); + { size_t const hint = ZSTD_decompressLegacyStream(zds->legacyContext, zds->legacyVersion, output, input); + if (hint==0) zds->streamStage = zdss_init; + return hint; + } } +#endif + { size_t const hSize = ZSTD_getFrameHeader_advanced(&zds->fParams, zds->headerBuffer, zds->lhSize, zds->format); + if (zds->refMultipleDDicts && zds->ddictSet) { + ZSTD_DCtx_selectFrameDDict(zds); + } + if (ZSTD_isError(hSize)) { +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) + U32 const legacyVersion = ZSTD_isLegacy(istart, iend-istart); + if (legacyVersion) { + ZSTD_DDict const* const ddict = ZSTD_getDDict(zds); + const void* const dict = ddict ? ZSTD_DDict_dictContent(ddict) : NULL; + size_t const dictSize = ddict ? ZSTD_DDict_dictSize(ddict) : 0; + DEBUGLOG(5, "ZSTD_decompressStream: detected legacy version v0.%u", legacyVersion); + RETURN_ERROR_IF(zds->staticSize, memory_allocation, + "legacy support is incompatible with static dctx"); + FORWARD_IF_ERROR(ZSTD_initLegacyStream(&zds->legacyContext, + zds->previousLegacyVersion, legacyVersion, + dict, dictSize), ""); + zds->legacyVersion = zds->previousLegacyVersion = legacyVersion; + { size_t const hint = ZSTD_decompressLegacyStream(zds->legacyContext, legacyVersion, output, input); + if (hint==0) zds->streamStage = zdss_init; /* or stay in stage zdss_loadHeader */ + return hint; + } } +#endif + return hSize; /* error */ + } + if (hSize != 0) { /* need more input */ + size_t const toLoad = hSize - zds->lhSize; /* if hSize!=0, hSize > zds->lhSize */ + size_t const remainingInput = (size_t)(iend-ip); + assert(iend >= ip); + if (toLoad > remainingInput) { /* not enough input to load full header */ + if (remainingInput > 0) { + ZSTD_memcpy(zds->headerBuffer + zds->lhSize, ip, remainingInput); + zds->lhSize += remainingInput; + } + input->pos = input->size; + /* check first few bytes */ + FORWARD_IF_ERROR( + ZSTD_getFrameHeader_advanced(&zds->fParams, zds->headerBuffer, zds->lhSize, zds->format), + "First few bytes detected incorrect" ); + /* return hint input size */ + return (MAX((size_t)ZSTD_FRAMEHEADERSIZE_MIN(zds->format), hSize) - zds->lhSize) + ZSTD_blockHeaderSize; /* remaining header bytes + next block header */ + } + assert(ip != NULL); + ZSTD_memcpy(zds->headerBuffer + zds->lhSize, ip, toLoad); zds->lhSize = hSize; ip += toLoad; + break; + } } + + /* check for single-pass mode opportunity */ + if (zds->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN + && zds->fParams.frameType != ZSTD_skippableFrame + && (U64)(size_t)(oend-op) >= zds->fParams.frameContentSize) { + size_t const cSize = ZSTD_findFrameCompressedSize_advanced(istart, (size_t)(iend-istart), zds->format); + if (cSize <= (size_t)(iend-istart)) { + /* shortcut : using single-pass mode */ + size_t const decompressedSize = ZSTD_decompress_usingDDict(zds, op, (size_t)(oend-op), istart, cSize, ZSTD_getDDict(zds)); + if (ZSTD_isError(decompressedSize)) return decompressedSize; + DEBUGLOG(4, "shortcut to single-pass ZSTD_decompress_usingDDict()"); + assert(istart != NULL); + ip = istart + cSize; + op = op ? op + decompressedSize : op; /* can occur if frameContentSize = 0 (empty frame) */ + zds->expected = 0; + zds->streamStage = zdss_init; + someMoreWork = 0; + break; + } } + + /* Check output buffer is large enough for ZSTD_odm_stable. */ + if (zds->outBufferMode == ZSTD_bm_stable + && zds->fParams.frameType != ZSTD_skippableFrame + && zds->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN + && (U64)(size_t)(oend-op) < zds->fParams.frameContentSize) { + RETURN_ERROR(dstSize_tooSmall, "ZSTD_obm_stable passed but ZSTD_outBuffer is too small"); + } + + /* Consume header (see ZSTDds_decodeFrameHeader) */ + DEBUGLOG(4, "Consume header"); + FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(zds, ZSTD_getDDict(zds)), ""); + + if (zds->format == ZSTD_f_zstd1 + && (MEM_readLE32(zds->headerBuffer) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */ + zds->expected = MEM_readLE32(zds->headerBuffer + ZSTD_FRAMEIDSIZE); + zds->stage = ZSTDds_skipFrame; + } else { + FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(zds, zds->headerBuffer, zds->lhSize), ""); + zds->expected = ZSTD_blockHeaderSize; + zds->stage = ZSTDds_decodeBlockHeader; + } + + /* control buffer memory usage */ + DEBUGLOG(4, "Control max memory usage (%u KB <= max %u KB)", + (U32)(zds->fParams.windowSize >>10), + (U32)(zds->maxWindowSize >> 10) ); + zds->fParams.windowSize = MAX(zds->fParams.windowSize, 1U << ZSTD_WINDOWLOG_ABSOLUTEMIN); + RETURN_ERROR_IF(zds->fParams.windowSize > zds->maxWindowSize, + frameParameter_windowTooLarge, ""); + if (zds->maxBlockSizeParam != 0) + zds->fParams.blockSizeMax = MIN(zds->fParams.blockSizeMax, (unsigned)zds->maxBlockSizeParam); + + /* Adapt buffer sizes to frame header instructions */ + { size_t const neededInBuffSize = MAX(zds->fParams.blockSizeMax, 4 /* frame checksum */); + size_t const neededOutBuffSize = zds->outBufferMode == ZSTD_bm_buffered + ? ZSTD_decodingBufferSize_internal(zds->fParams.windowSize, zds->fParams.frameContentSize, zds->fParams.blockSizeMax) + : 0; + + ZSTD_DCtx_updateOversizedDuration(zds, neededInBuffSize, neededOutBuffSize); + + { int const tooSmall = (zds->inBuffSize < neededInBuffSize) || (zds->outBuffSize < neededOutBuffSize); + int const tooLarge = ZSTD_DCtx_isOversizedTooLong(zds); + + if (tooSmall || tooLarge) { + size_t const bufferSize = neededInBuffSize + neededOutBuffSize; + DEBUGLOG(4, "inBuff : from %u to %u", + (U32)zds->inBuffSize, (U32)neededInBuffSize); + DEBUGLOG(4, "outBuff : from %u to %u", + (U32)zds->outBuffSize, (U32)neededOutBuffSize); + if (zds->staticSize) { /* static DCtx */ + DEBUGLOG(4, "staticSize : %u", (U32)zds->staticSize); + assert(zds->staticSize >= sizeof(ZSTD_DCtx)); /* controlled at init */ + RETURN_ERROR_IF( + bufferSize > zds->staticSize - sizeof(ZSTD_DCtx), + memory_allocation, ""); + } else { + ZSTD_customFree(zds->inBuff, zds->customMem); + zds->inBuffSize = 0; + zds->outBuffSize = 0; + zds->inBuff = (char*)ZSTD_customMalloc(bufferSize, zds->customMem); + RETURN_ERROR_IF(zds->inBuff == NULL, memory_allocation, ""); + } + zds->inBuffSize = neededInBuffSize; + zds->outBuff = zds->inBuff + zds->inBuffSize; + zds->outBuffSize = neededOutBuffSize; + } } } + zds->streamStage = zdss_read; + ZSTD_FALLTHROUGH; + + case zdss_read: + DEBUGLOG(5, "stage zdss_read"); + { size_t const neededInSize = ZSTD_nextSrcSizeToDecompressWithInputSize(zds, (size_t)(iend - ip)); + DEBUGLOG(5, "neededInSize = %u", (U32)neededInSize); + if (neededInSize==0) { /* end of frame */ + zds->streamStage = zdss_init; + someMoreWork = 0; + break; + } + if ((size_t)(iend-ip) >= neededInSize) { /* decode directly from src */ + FORWARD_IF_ERROR(ZSTD_decompressContinueStream(zds, &op, oend, ip, neededInSize), ""); + assert(ip != NULL); + ip += neededInSize; + /* Function modifies the stage so we must break */ + break; + } } + if (ip==iend) { someMoreWork = 0; break; } /* no more input */ + zds->streamStage = zdss_load; + ZSTD_FALLTHROUGH; + + case zdss_load: + { size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds); + size_t const toLoad = neededInSize - zds->inPos; + int const isSkipFrame = ZSTD_isSkipFrame(zds); + size_t loadedSize; + /* At this point we shouldn't be decompressing a block that we can stream. */ + assert(neededInSize == ZSTD_nextSrcSizeToDecompressWithInputSize(zds, (size_t)(iend - ip))); + if (isSkipFrame) { + loadedSize = MIN(toLoad, (size_t)(iend-ip)); + } else { + RETURN_ERROR_IF(toLoad > zds->inBuffSize - zds->inPos, + corruption_detected, + "should never happen"); + loadedSize = ZSTD_limitCopy(zds->inBuff + zds->inPos, toLoad, ip, (size_t)(iend-ip)); + } + if (loadedSize != 0) { + /* ip may be NULL */ + ip += loadedSize; + zds->inPos += loadedSize; + } + if (loadedSize < toLoad) { someMoreWork = 0; break; } /* not enough input, wait for more */ + + /* decode loaded input */ + zds->inPos = 0; /* input is consumed */ + FORWARD_IF_ERROR(ZSTD_decompressContinueStream(zds, &op, oend, zds->inBuff, neededInSize), ""); + /* Function modifies the stage so we must break */ + break; + } + case zdss_flush: + { + size_t const toFlushSize = zds->outEnd - zds->outStart; + size_t const flushedSize = ZSTD_limitCopy(op, (size_t)(oend-op), zds->outBuff + zds->outStart, toFlushSize); + + op = op ? op + flushedSize : op; + + zds->outStart += flushedSize; + if (flushedSize == toFlushSize) { /* flush completed */ + zds->streamStage = zdss_read; + if ( (zds->outBuffSize < zds->fParams.frameContentSize) + && (zds->outStart + zds->fParams.blockSizeMax > zds->outBuffSize) ) { + DEBUGLOG(5, "restart filling outBuff from beginning (left:%i, needed:%u)", + (int)(zds->outBuffSize - zds->outStart), + (U32)zds->fParams.blockSizeMax); + zds->outStart = zds->outEnd = 0; + } + break; + } } + /* cannot complete flush */ + someMoreWork = 0; + break; + + default: + assert(0); /* impossible */ + RETURN_ERROR(GENERIC, "impossible to reach"); /* some compilers require default to do something */ + } } + + /* result */ + input->pos = (size_t)(ip - (const char*)(input->src)); + output->pos = (size_t)(op - (char*)(output->dst)); + + /* Update the expected output buffer for ZSTD_obm_stable. */ + zds->expectedOutBuffer = *output; + + if ((ip==istart) && (op==ostart)) { /* no forward progress */ + zds->noForwardProgress ++; + if (zds->noForwardProgress >= ZSTD_NO_FORWARD_PROGRESS_MAX) { + RETURN_ERROR_IF(op==oend, noForwardProgress_destFull, ""); + RETURN_ERROR_IF(ip==iend, noForwardProgress_inputEmpty, ""); + assert(0); + } + } else { + zds->noForwardProgress = 0; + } + { size_t nextSrcSizeHint = ZSTD_nextSrcSizeToDecompress(zds); + if (!nextSrcSizeHint) { /* frame fully decoded */ + if (zds->outEnd == zds->outStart) { /* output fully flushed */ + if (zds->hostageByte) { + if (input->pos >= input->size) { + /* can't release hostage (not present) */ + zds->streamStage = zdss_read; + return 1; + } + input->pos++; /* release hostage */ + } /* zds->hostageByte */ + return 0; + } /* zds->outEnd == zds->outStart */ + if (!zds->hostageByte) { /* output not fully flushed; keep last byte as hostage; will be released when all output is flushed */ + input->pos--; /* note : pos > 0, otherwise, impossible to finish reading last block */ + zds->hostageByte=1; + } + return 1; + } /* nextSrcSizeHint==0 */ + nextSrcSizeHint += ZSTD_blockHeaderSize * (ZSTD_nextInputType(zds) == ZSTDnit_block); /* preload header of next block */ + assert(zds->inPos <= nextSrcSizeHint); + nextSrcSizeHint -= zds->inPos; /* part already loaded*/ + return nextSrcSizeHint; + } +} + +size_t ZSTD_decompressStream_simpleArgs ( + ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, size_t* dstPos, + const void* src, size_t srcSize, size_t* srcPos) +{ + ZSTD_outBuffer output; + ZSTD_inBuffer input; + output.dst = dst; + output.size = dstCapacity; + output.pos = *dstPos; + input.src = src; + input.size = srcSize; + input.pos = *srcPos; + { size_t const cErr = ZSTD_decompressStream(dctx, &output, &input); + *dstPos = output.pos; + *srcPos = input.pos; + return cErr; + } +} diff --git a/c-blosc/internal-complibs/zstd-1.5.6/decompress/zstd_decompress_block.c b/c-blosc/internal-complibs/zstd-1.5.6/decompress/zstd_decompress_block.c new file mode 100644 index 0000000..76d7332 --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.5.6/decompress/zstd_decompress_block.c @@ -0,0 +1,2215 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/* zstd_decompress_block : + * this module takes care of decompressing _compressed_ block */ + +/*-******************************************************* +* Dependencies +*********************************************************/ +#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */ +#include "../common/compiler.h" /* prefetch */ +#include "../common/cpu.h" /* bmi2 */ +#include "../common/mem.h" /* low level memory routines */ +#define FSE_STATIC_LINKING_ONLY +#include "../common/fse.h" +#include "../common/huf.h" +#include "../common/zstd_internal.h" +#include "zstd_decompress_internal.h" /* ZSTD_DCtx */ +#include "zstd_ddict.h" /* ZSTD_DDictDictContent */ +#include "zstd_decompress_block.h" +#include "../common/bits.h" /* ZSTD_highbit32 */ + +/*_******************************************************* +* Macros +**********************************************************/ + +/* These two optional macros force the use one way or another of the two + * ZSTD_decompressSequences implementations. You can't force in both directions + * at the same time. + */ +#if defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ + defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) +#error "Cannot force the use of the short and the long ZSTD_decompressSequences variants!" +#endif + + +/*_******************************************************* +* Memory operations +**********************************************************/ +static void ZSTD_copy4(void* dst, const void* src) { ZSTD_memcpy(dst, src, 4); } + + +/*-************************************************************* + * Block decoding + ***************************************************************/ + +static size_t ZSTD_blockSizeMax(ZSTD_DCtx const* dctx) +{ + size_t const blockSizeMax = dctx->isFrameDecompression ? dctx->fParams.blockSizeMax : ZSTD_BLOCKSIZE_MAX; + assert(blockSizeMax <= ZSTD_BLOCKSIZE_MAX); + return blockSizeMax; +} + +/*! ZSTD_getcBlockSize() : + * Provides the size of compressed block from block header `src` */ +size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, + blockProperties_t* bpPtr) +{ + RETURN_ERROR_IF(srcSize < ZSTD_blockHeaderSize, srcSize_wrong, ""); + + { U32 const cBlockHeader = MEM_readLE24(src); + U32 const cSize = cBlockHeader >> 3; + bpPtr->lastBlock = cBlockHeader & 1; + bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3); + bpPtr->origSize = cSize; /* only useful for RLE */ + if (bpPtr->blockType == bt_rle) return 1; + RETURN_ERROR_IF(bpPtr->blockType == bt_reserved, corruption_detected, ""); + return cSize; + } +} + +/* Allocate buffer for literals, either overlapping current dst, or split between dst and litExtraBuffer, or stored entirely within litExtraBuffer */ +static void ZSTD_allocateLiteralsBuffer(ZSTD_DCtx* dctx, void* const dst, const size_t dstCapacity, const size_t litSize, + const streaming_operation streaming, const size_t expectedWriteSize, const unsigned splitImmediately) +{ + size_t const blockSizeMax = ZSTD_blockSizeMax(dctx); + assert(litSize <= blockSizeMax); + assert(dctx->isFrameDecompression || streaming == not_streaming); + assert(expectedWriteSize <= blockSizeMax); + if (streaming == not_streaming && dstCapacity > blockSizeMax + WILDCOPY_OVERLENGTH + litSize + WILDCOPY_OVERLENGTH) { + /* If we aren't streaming, we can just put the literals after the output + * of the current block. We don't need to worry about overwriting the + * extDict of our window, because it doesn't exist. + * So if we have space after the end of the block, just put it there. + */ + dctx->litBuffer = (BYTE*)dst + blockSizeMax + WILDCOPY_OVERLENGTH; + dctx->litBufferEnd = dctx->litBuffer + litSize; + dctx->litBufferLocation = ZSTD_in_dst; + } else if (litSize <= ZSTD_LITBUFFEREXTRASIZE) { + /* Literals fit entirely within the extra buffer, put them there to avoid + * having to split the literals. + */ + dctx->litBuffer = dctx->litExtraBuffer; + dctx->litBufferEnd = dctx->litBuffer + litSize; + dctx->litBufferLocation = ZSTD_not_in_dst; + } else { + assert(blockSizeMax > ZSTD_LITBUFFEREXTRASIZE); + /* Literals must be split between the output block and the extra lit + * buffer. We fill the extra lit buffer with the tail of the literals, + * and put the rest of the literals at the end of the block, with + * WILDCOPY_OVERLENGTH of buffer room to allow for overreads. + * This MUST not write more than our maxBlockSize beyond dst, because in + * streaming mode, that could overwrite part of our extDict window. + */ + if (splitImmediately) { + /* won't fit in litExtraBuffer, so it will be split between end of dst and extra buffer */ + dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH; + dctx->litBufferEnd = dctx->litBuffer + litSize - ZSTD_LITBUFFEREXTRASIZE; + } else { + /* initially this will be stored entirely in dst during huffman decoding, it will partially be shifted to litExtraBuffer after */ + dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize; + dctx->litBufferEnd = (BYTE*)dst + expectedWriteSize; + } + dctx->litBufferLocation = ZSTD_split; + assert(dctx->litBufferEnd <= (BYTE*)dst + expectedWriteSize); + } +} + +/*! ZSTD_decodeLiteralsBlock() : + * Where it is possible to do so without being stomped by the output during decompression, the literals block will be stored + * in the dstBuffer. If there is room to do so, it will be stored in full in the excess dst space after where the current + * block will be output. Otherwise it will be stored at the end of the current dst blockspace, with a small portion being + * stored in dctx->litExtraBuffer to help keep it "ahead" of the current output write. + * + * @return : nb of bytes read from src (< srcSize ) + * note : symbol not declared but exposed for fullbench */ +static size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, + const void* src, size_t srcSize, /* note : srcSize < BLOCKSIZE */ + void* dst, size_t dstCapacity, const streaming_operation streaming) +{ + DEBUGLOG(5, "ZSTD_decodeLiteralsBlock"); + RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected, ""); + + { const BYTE* const istart = (const BYTE*) src; + symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3); + size_t const blockSizeMax = ZSTD_blockSizeMax(dctx); + + switch(litEncType) + { + case set_repeat: + DEBUGLOG(5, "set_repeat flag : re-using stats from previous compressed literals block"); + RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted, ""); + ZSTD_FALLTHROUGH; + + case set_compressed: + RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need up to 5 for case 3"); + { size_t lhSize, litSize, litCSize; + U32 singleStream=0; + U32 const lhlCode = (istart[0] >> 2) & 3; + U32 const lhc = MEM_readLE32(istart); + size_t hufSuccess; + size_t expectedWriteSize = MIN(blockSizeMax, dstCapacity); + int const flags = 0 + | (ZSTD_DCtx_get_bmi2(dctx) ? HUF_flags_bmi2 : 0) + | (dctx->disableHufAsm ? HUF_flags_disableAsm : 0); + switch(lhlCode) + { + case 0: case 1: default: /* note : default is impossible, since lhlCode into [0..3] */ + /* 2 - 2 - 10 - 10 */ + singleStream = !lhlCode; + lhSize = 3; + litSize = (lhc >> 4) & 0x3FF; + litCSize = (lhc >> 14) & 0x3FF; + break; + case 2: + /* 2 - 2 - 14 - 14 */ + lhSize = 4; + litSize = (lhc >> 4) & 0x3FFF; + litCSize = lhc >> 18; + break; + case 3: + /* 2 - 2 - 18 - 18 */ + lhSize = 5; + litSize = (lhc >> 4) & 0x3FFFF; + litCSize = (lhc >> 22) + ((size_t)istart[4] << 10); + break; + } + RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled"); + RETURN_ERROR_IF(litSize > blockSizeMax, corruption_detected, ""); + if (!singleStream) + RETURN_ERROR_IF(litSize < MIN_LITERALS_FOR_4_STREAMS, literals_headerWrong, + "Not enough literals (%zu) for the 4-streams mode (min %u)", + litSize, MIN_LITERALS_FOR_4_STREAMS); + RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, ""); + RETURN_ERROR_IF(expectedWriteSize < litSize , dstSize_tooSmall, ""); + ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 0); + + /* prefetch huffman table if cold */ + if (dctx->ddictIsCold && (litSize > 768 /* heuristic */)) { + PREFETCH_AREA(dctx->HUFptr, sizeof(dctx->entropy.hufTable)); + } + + if (litEncType==set_repeat) { + if (singleStream) { + hufSuccess = HUF_decompress1X_usingDTable( + dctx->litBuffer, litSize, istart+lhSize, litCSize, + dctx->HUFptr, flags); + } else { + assert(litSize >= MIN_LITERALS_FOR_4_STREAMS); + hufSuccess = HUF_decompress4X_usingDTable( + dctx->litBuffer, litSize, istart+lhSize, litCSize, + dctx->HUFptr, flags); + } + } else { + if (singleStream) { +#if defined(HUF_FORCE_DECOMPRESS_X2) + hufSuccess = HUF_decompress1X_DCtx_wksp( + dctx->entropy.hufTable, dctx->litBuffer, litSize, + istart+lhSize, litCSize, dctx->workspace, + sizeof(dctx->workspace), flags); +#else + hufSuccess = HUF_decompress1X1_DCtx_wksp( + dctx->entropy.hufTable, dctx->litBuffer, litSize, + istart+lhSize, litCSize, dctx->workspace, + sizeof(dctx->workspace), flags); +#endif + } else { + hufSuccess = HUF_decompress4X_hufOnly_wksp( + dctx->entropy.hufTable, dctx->litBuffer, litSize, + istart+lhSize, litCSize, dctx->workspace, + sizeof(dctx->workspace), flags); + } + } + if (dctx->litBufferLocation == ZSTD_split) + { + assert(litSize > ZSTD_LITBUFFEREXTRASIZE); + ZSTD_memcpy(dctx->litExtraBuffer, dctx->litBufferEnd - ZSTD_LITBUFFEREXTRASIZE, ZSTD_LITBUFFEREXTRASIZE); + ZSTD_memmove(dctx->litBuffer + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH, dctx->litBuffer, litSize - ZSTD_LITBUFFEREXTRASIZE); + dctx->litBuffer += ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH; + dctx->litBufferEnd -= WILDCOPY_OVERLENGTH; + assert(dctx->litBufferEnd <= (BYTE*)dst + blockSizeMax); + } + + RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected, ""); + + dctx->litPtr = dctx->litBuffer; + dctx->litSize = litSize; + dctx->litEntropy = 1; + if (litEncType==set_compressed) dctx->HUFptr = dctx->entropy.hufTable; + return litCSize + lhSize; + } + + case set_basic: + { size_t litSize, lhSize; + U32 const lhlCode = ((istart[0]) >> 2) & 3; + size_t expectedWriteSize = MIN(blockSizeMax, dstCapacity); + switch(lhlCode) + { + case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */ + lhSize = 1; + litSize = istart[0] >> 3; + break; + case 1: + lhSize = 2; + litSize = MEM_readLE16(istart) >> 4; + break; + case 3: + lhSize = 3; + RETURN_ERROR_IF(srcSize<3, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize = 3"); + litSize = MEM_readLE24(istart) >> 4; + break; + } + + RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled"); + RETURN_ERROR_IF(litSize > blockSizeMax, corruption_detected, ""); + RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, ""); + ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 1); + if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */ + RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected, ""); + if (dctx->litBufferLocation == ZSTD_split) + { + ZSTD_memcpy(dctx->litBuffer, istart + lhSize, litSize - ZSTD_LITBUFFEREXTRASIZE); + ZSTD_memcpy(dctx->litExtraBuffer, istart + lhSize + litSize - ZSTD_LITBUFFEREXTRASIZE, ZSTD_LITBUFFEREXTRASIZE); + } + else + { + ZSTD_memcpy(dctx->litBuffer, istart + lhSize, litSize); + } + dctx->litPtr = dctx->litBuffer; + dctx->litSize = litSize; + return lhSize+litSize; + } + /* direct reference into compressed stream */ + dctx->litPtr = istart+lhSize; + dctx->litSize = litSize; + dctx->litBufferEnd = dctx->litPtr + litSize; + dctx->litBufferLocation = ZSTD_not_in_dst; + return lhSize+litSize; + } + + case set_rle: + { U32 const lhlCode = ((istart[0]) >> 2) & 3; + size_t litSize, lhSize; + size_t expectedWriteSize = MIN(blockSizeMax, dstCapacity); + switch(lhlCode) + { + case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */ + lhSize = 1; + litSize = istart[0] >> 3; + break; + case 1: + lhSize = 2; + RETURN_ERROR_IF(srcSize<3, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize+1 = 3"); + litSize = MEM_readLE16(istart) >> 4; + break; + case 3: + lhSize = 3; + RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize+1 = 4"); + litSize = MEM_readLE24(istart) >> 4; + break; + } + RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled"); + RETURN_ERROR_IF(litSize > blockSizeMax, corruption_detected, ""); + RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, ""); + ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 1); + if (dctx->litBufferLocation == ZSTD_split) + { + ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize - ZSTD_LITBUFFEREXTRASIZE); + ZSTD_memset(dctx->litExtraBuffer, istart[lhSize], ZSTD_LITBUFFEREXTRASIZE); + } + else + { + ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize); + } + dctx->litPtr = dctx->litBuffer; + dctx->litSize = litSize; + return lhSize+1; + } + default: + RETURN_ERROR(corruption_detected, "impossible"); + } + } +} + +/* Hidden declaration for fullbench */ +size_t ZSTD_decodeLiteralsBlock_wrapper(ZSTD_DCtx* dctx, + const void* src, size_t srcSize, + void* dst, size_t dstCapacity); +size_t ZSTD_decodeLiteralsBlock_wrapper(ZSTD_DCtx* dctx, + const void* src, size_t srcSize, + void* dst, size_t dstCapacity) +{ + dctx->isFrameDecompression = 0; + return ZSTD_decodeLiteralsBlock(dctx, src, srcSize, dst, dstCapacity, not_streaming); +} + +/* Default FSE distribution tables. + * These are pre-calculated FSE decoding tables using default distributions as defined in specification : + * https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#default-distributions + * They were generated programmatically with following method : + * - start from default distributions, present in /lib/common/zstd_internal.h + * - generate tables normally, using ZSTD_buildFSETable() + * - printout the content of tables + * - pretify output, report below, test with fuzzer to ensure it's correct */ + +/* Default FSE distribution table for Literal Lengths */ +static const ZSTD_seqSymbol LL_defaultDTable[(1<tableLog = 0; + DTableH->fastMode = 0; + + cell->nbBits = 0; + cell->nextState = 0; + assert(nbAddBits < 255); + cell->nbAdditionalBits = nbAddBits; + cell->baseValue = baseValue; +} + + +/* ZSTD_buildFSETable() : + * generate FSE decoding table for one symbol (ll, ml or off) + * cannot fail if input is valid => + * all inputs are presumed validated at this stage */ +FORCE_INLINE_TEMPLATE +void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt, + const short* normalizedCounter, unsigned maxSymbolValue, + const U32* baseValue, const U8* nbAdditionalBits, + unsigned tableLog, void* wksp, size_t wkspSize) +{ + ZSTD_seqSymbol* const tableDecode = dt+1; + U32 const maxSV1 = maxSymbolValue + 1; + U32 const tableSize = 1 << tableLog; + + U16* symbolNext = (U16*)wksp; + BYTE* spread = (BYTE*)(symbolNext + MaxSeq + 1); + U32 highThreshold = tableSize - 1; + + + /* Sanity Checks */ + assert(maxSymbolValue <= MaxSeq); + assert(tableLog <= MaxFSELog); + assert(wkspSize >= ZSTD_BUILD_FSE_TABLE_WKSP_SIZE); + (void)wkspSize; + /* Init, lay down lowprob symbols */ + { ZSTD_seqSymbol_header DTableH; + DTableH.tableLog = tableLog; + DTableH.fastMode = 1; + { S16 const largeLimit= (S16)(1 << (tableLog-1)); + U32 s; + for (s=0; s= largeLimit) DTableH.fastMode=0; + assert(normalizedCounter[s]>=0); + symbolNext[s] = (U16)normalizedCounter[s]; + } } } + ZSTD_memcpy(dt, &DTableH, sizeof(DTableH)); + } + + /* Spread symbols */ + assert(tableSize <= 512); + /* Specialized symbol spreading for the case when there are + * no low probability (-1 count) symbols. When compressing + * small blocks we avoid low probability symbols to hit this + * case, since header decoding speed matters more. + */ + if (highThreshold == tableSize - 1) { + size_t const tableMask = tableSize-1; + size_t const step = FSE_TABLESTEP(tableSize); + /* First lay down the symbols in order. + * We use a uint64_t to lay down 8 bytes at a time. This reduces branch + * misses since small blocks generally have small table logs, so nearly + * all symbols have counts <= 8. We ensure we have 8 bytes at the end of + * our buffer to handle the over-write. + */ + { + U64 const add = 0x0101010101010101ull; + size_t pos = 0; + U64 sv = 0; + U32 s; + for (s=0; s=0); + pos += (size_t)n; + } + } + /* Now we spread those positions across the table. + * The benefit of doing it in two stages is that we avoid the + * variable size inner loop, which caused lots of branch misses. + * Now we can run through all the positions without any branch misses. + * We unroll the loop twice, since that is what empirically worked best. + */ + { + size_t position = 0; + size_t s; + size_t const unroll = 2; + assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */ + for (s = 0; s < (size_t)tableSize; s += unroll) { + size_t u; + for (u = 0; u < unroll; ++u) { + size_t const uPosition = (position + (u * step)) & tableMask; + tableDecode[uPosition].baseValue = spread[s + u]; + } + position = (position + (unroll * step)) & tableMask; + } + assert(position == 0); + } + } else { + U32 const tableMask = tableSize-1; + U32 const step = FSE_TABLESTEP(tableSize); + U32 s, position = 0; + for (s=0; s highThreshold)) position = (position + step) & tableMask; /* lowprob area */ + } } + assert(position == 0); /* position must reach all cells once, otherwise normalizedCounter is incorrect */ + } + + /* Build Decoding table */ + { + U32 u; + for (u=0; u max, corruption_detected, ""); + { U32 const symbol = *(const BYTE*)src; + U32 const baseline = baseValue[symbol]; + U8 const nbBits = nbAdditionalBits[symbol]; + ZSTD_buildSeqTable_rle(DTableSpace, baseline, nbBits); + } + *DTablePtr = DTableSpace; + return 1; + case set_basic : + *DTablePtr = defaultTable; + return 0; + case set_repeat: + RETURN_ERROR_IF(!flagRepeatTable, corruption_detected, ""); + /* prefetch FSE table if used */ + if (ddictIsCold && (nbSeq > 24 /* heuristic */)) { + const void* const pStart = *DTablePtr; + size_t const pSize = sizeof(ZSTD_seqSymbol) * (SEQSYMBOL_TABLE_SIZE(maxLog)); + PREFETCH_AREA(pStart, pSize); + } + return 0; + case set_compressed : + { unsigned tableLog; + S16 norm[MaxSeq+1]; + size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize); + RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected, ""); + RETURN_ERROR_IF(tableLog > maxLog, corruption_detected, ""); + ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog, wksp, wkspSize, bmi2); + *DTablePtr = DTableSpace; + return headerSize; + } + default : + assert(0); + RETURN_ERROR(GENERIC, "impossible"); + } +} + +size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, + const void* src, size_t srcSize) +{ + const BYTE* const istart = (const BYTE*)src; + const BYTE* const iend = istart + srcSize; + const BYTE* ip = istart; + int nbSeq; + DEBUGLOG(5, "ZSTD_decodeSeqHeaders"); + + /* check */ + RETURN_ERROR_IF(srcSize < MIN_SEQUENCES_SIZE, srcSize_wrong, ""); + + /* SeqHead */ + nbSeq = *ip++; + if (nbSeq > 0x7F) { + if (nbSeq == 0xFF) { + RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong, ""); + nbSeq = MEM_readLE16(ip) + LONGNBSEQ; + ip+=2; + } else { + RETURN_ERROR_IF(ip >= iend, srcSize_wrong, ""); + nbSeq = ((nbSeq-0x80)<<8) + *ip++; + } + } + *nbSeqPtr = nbSeq; + + if (nbSeq == 0) { + /* No sequence : section ends immediately */ + RETURN_ERROR_IF(ip != iend, corruption_detected, + "extraneous data present in the Sequences section"); + return (size_t)(ip - istart); + } + + /* FSE table descriptors */ + RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong, ""); /* minimum possible size: 1 byte for symbol encoding types */ + RETURN_ERROR_IF(*ip & 3, corruption_detected, ""); /* The last field, Reserved, must be all-zeroes. */ + { symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6); + symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3); + symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3); + ip++; + + /* Build DTables */ + { size_t const llhSize = ZSTD_buildSeqTable(dctx->entropy.LLTable, &dctx->LLTptr, + LLtype, MaxLL, LLFSELog, + ip, iend-ip, + LL_base, LL_bits, + LL_defaultDTable, dctx->fseEntropy, + dctx->ddictIsCold, nbSeq, + dctx->workspace, sizeof(dctx->workspace), + ZSTD_DCtx_get_bmi2(dctx)); + RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected, "ZSTD_buildSeqTable failed"); + ip += llhSize; + } + + { size_t const ofhSize = ZSTD_buildSeqTable(dctx->entropy.OFTable, &dctx->OFTptr, + OFtype, MaxOff, OffFSELog, + ip, iend-ip, + OF_base, OF_bits, + OF_defaultDTable, dctx->fseEntropy, + dctx->ddictIsCold, nbSeq, + dctx->workspace, sizeof(dctx->workspace), + ZSTD_DCtx_get_bmi2(dctx)); + RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected, "ZSTD_buildSeqTable failed"); + ip += ofhSize; + } + + { size_t const mlhSize = ZSTD_buildSeqTable(dctx->entropy.MLTable, &dctx->MLTptr, + MLtype, MaxML, MLFSELog, + ip, iend-ip, + ML_base, ML_bits, + ML_defaultDTable, dctx->fseEntropy, + dctx->ddictIsCold, nbSeq, + dctx->workspace, sizeof(dctx->workspace), + ZSTD_DCtx_get_bmi2(dctx)); + RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected, "ZSTD_buildSeqTable failed"); + ip += mlhSize; + } + } + + return ip-istart; +} + + +typedef struct { + size_t litLength; + size_t matchLength; + size_t offset; +} seq_t; + +typedef struct { + size_t state; + const ZSTD_seqSymbol* table; +} ZSTD_fseState; + +typedef struct { + BIT_DStream_t DStream; + ZSTD_fseState stateLL; + ZSTD_fseState stateOffb; + ZSTD_fseState stateML; + size_t prevOffset[ZSTD_REP_NUM]; +} seqState_t; + +/*! ZSTD_overlapCopy8() : + * Copies 8 bytes from ip to op and updates op and ip where ip <= op. + * If the offset is < 8 then the offset is spread to at least 8 bytes. + * + * Precondition: *ip <= *op + * Postcondition: *op - *op >= 8 + */ +HINT_INLINE void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) { + assert(*ip <= *op); + if (offset < 8) { + /* close range match, overlap */ + static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */ + static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */ + int const sub2 = dec64table[offset]; + (*op)[0] = (*ip)[0]; + (*op)[1] = (*ip)[1]; + (*op)[2] = (*ip)[2]; + (*op)[3] = (*ip)[3]; + *ip += dec32table[offset]; + ZSTD_copy4(*op+4, *ip); + *ip -= sub2; + } else { + ZSTD_copy8(*op, *ip); + } + *ip += 8; + *op += 8; + assert(*op - *ip >= 8); +} + +/*! ZSTD_safecopy() : + * Specialized version of memcpy() that is allowed to READ up to WILDCOPY_OVERLENGTH past the input buffer + * and write up to 16 bytes past oend_w (op >= oend_w is allowed). + * This function is only called in the uncommon case where the sequence is near the end of the block. It + * should be fast for a single long sequence, but can be slow for several short sequences. + * + * @param ovtype controls the overlap detection + * - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart. + * - ZSTD_overlap_src_before_dst: The src and dst may overlap and may be any distance apart. + * The src buffer must be before the dst buffer. + */ +static void ZSTD_safecopy(BYTE* op, const BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) { + ptrdiff_t const diff = op - ip; + BYTE* const oend = op + length; + + assert((ovtype == ZSTD_no_overlap && (diff <= -8 || diff >= 8 || op >= oend_w)) || + (ovtype == ZSTD_overlap_src_before_dst && diff >= 0)); + + if (length < 8) { + /* Handle short lengths. */ + while (op < oend) *op++ = *ip++; + return; + } + if (ovtype == ZSTD_overlap_src_before_dst) { + /* Copy 8 bytes and ensure the offset >= 8 when there can be overlap. */ + assert(length >= 8); + ZSTD_overlapCopy8(&op, &ip, diff); + length -= 8; + assert(op - ip >= 8); + assert(op <= oend); + } + + if (oend <= oend_w) { + /* No risk of overwrite. */ + ZSTD_wildcopy(op, ip, length, ovtype); + return; + } + if (op <= oend_w) { + /* Wildcopy until we get close to the end. */ + assert(oend > oend_w); + ZSTD_wildcopy(op, ip, oend_w - op, ovtype); + ip += oend_w - op; + op += oend_w - op; + } + /* Handle the leftovers. */ + while (op < oend) *op++ = *ip++; +} + +/* ZSTD_safecopyDstBeforeSrc(): + * This version allows overlap with dst before src, or handles the non-overlap case with dst after src + * Kept separate from more common ZSTD_safecopy case to avoid performance impact to the safecopy common case */ +static void ZSTD_safecopyDstBeforeSrc(BYTE* op, const BYTE* ip, ptrdiff_t length) { + ptrdiff_t const diff = op - ip; + BYTE* const oend = op + length; + + if (length < 8 || diff > -8) { + /* Handle short lengths, close overlaps, and dst not before src. */ + while (op < oend) *op++ = *ip++; + return; + } + + if (op <= oend - WILDCOPY_OVERLENGTH && diff < -WILDCOPY_VECLEN) { + ZSTD_wildcopy(op, ip, oend - WILDCOPY_OVERLENGTH - op, ZSTD_no_overlap); + ip += oend - WILDCOPY_OVERLENGTH - op; + op += oend - WILDCOPY_OVERLENGTH - op; + } + + /* Handle the leftovers. */ + while (op < oend) *op++ = *ip++; +} + +/* ZSTD_execSequenceEnd(): + * This version handles cases that are near the end of the output buffer. It requires + * more careful checks to make sure there is no overflow. By separating out these hard + * and unlikely cases, we can speed up the common cases. + * + * NOTE: This function needs to be fast for a single long sequence, but doesn't need + * to be optimized for many small sequences, since those fall into ZSTD_execSequence(). + */ +FORCE_NOINLINE +ZSTD_ALLOW_POINTER_OVERFLOW_ATTR +size_t ZSTD_execSequenceEnd(BYTE* op, + BYTE* const oend, seq_t sequence, + const BYTE** litPtr, const BYTE* const litLimit, + const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd) +{ + BYTE* const oLitEnd = op + sequence.litLength; + size_t const sequenceLength = sequence.litLength + sequence.matchLength; + const BYTE* const iLitEnd = *litPtr + sequence.litLength; + const BYTE* match = oLitEnd - sequence.offset; + BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; + + /* bounds checks : careful of address space overflow in 32-bit mode */ + RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer"); + RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer"); + assert(op < op + sequenceLength); + assert(oLitEnd < op + sequenceLength); + + /* copy literals */ + ZSTD_safecopy(op, oend_w, *litPtr, sequence.litLength, ZSTD_no_overlap); + op = oLitEnd; + *litPtr = iLitEnd; + + /* copy Match */ + if (sequence.offset > (size_t)(oLitEnd - prefixStart)) { + /* offset beyond prefix */ + RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, ""); + match = dictEnd - (prefixStart - match); + if (match + sequence.matchLength <= dictEnd) { + ZSTD_memmove(oLitEnd, match, sequence.matchLength); + return sequenceLength; + } + /* span extDict & currentPrefixSegment */ + { size_t const length1 = dictEnd - match; + ZSTD_memmove(oLitEnd, match, length1); + op = oLitEnd + length1; + sequence.matchLength -= length1; + match = prefixStart; + } + } + ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst); + return sequenceLength; +} + +/* ZSTD_execSequenceEndSplitLitBuffer(): + * This version is intended to be used during instances where the litBuffer is still split. It is kept separate to avoid performance impact for the good case. + */ +FORCE_NOINLINE +ZSTD_ALLOW_POINTER_OVERFLOW_ATTR +size_t ZSTD_execSequenceEndSplitLitBuffer(BYTE* op, + BYTE* const oend, const BYTE* const oend_w, seq_t sequence, + const BYTE** litPtr, const BYTE* const litLimit, + const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd) +{ + BYTE* const oLitEnd = op + sequence.litLength; + size_t const sequenceLength = sequence.litLength + sequence.matchLength; + const BYTE* const iLitEnd = *litPtr + sequence.litLength; + const BYTE* match = oLitEnd - sequence.offset; + + + /* bounds checks : careful of address space overflow in 32-bit mode */ + RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer"); + RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer"); + assert(op < op + sequenceLength); + assert(oLitEnd < op + sequenceLength); + + /* copy literals */ + RETURN_ERROR_IF(op > *litPtr && op < *litPtr + sequence.litLength, dstSize_tooSmall, "output should not catch up to and overwrite literal buffer"); + ZSTD_safecopyDstBeforeSrc(op, *litPtr, sequence.litLength); + op = oLitEnd; + *litPtr = iLitEnd; + + /* copy Match */ + if (sequence.offset > (size_t)(oLitEnd - prefixStart)) { + /* offset beyond prefix */ + RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, ""); + match = dictEnd - (prefixStart - match); + if (match + sequence.matchLength <= dictEnd) { + ZSTD_memmove(oLitEnd, match, sequence.matchLength); + return sequenceLength; + } + /* span extDict & currentPrefixSegment */ + { size_t const length1 = dictEnd - match; + ZSTD_memmove(oLitEnd, match, length1); + op = oLitEnd + length1; + sequence.matchLength -= length1; + match = prefixStart; + } + } + ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst); + return sequenceLength; +} + +HINT_INLINE +ZSTD_ALLOW_POINTER_OVERFLOW_ATTR +size_t ZSTD_execSequence(BYTE* op, + BYTE* const oend, seq_t sequence, + const BYTE** litPtr, const BYTE* const litLimit, + const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd) +{ + BYTE* const oLitEnd = op + sequence.litLength; + size_t const sequenceLength = sequence.litLength + sequence.matchLength; + BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */ + BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; /* risk : address space underflow on oend=NULL */ + const BYTE* const iLitEnd = *litPtr + sequence.litLength; + const BYTE* match = oLitEnd - sequence.offset; + + assert(op != NULL /* Precondition */); + assert(oend_w < oend /* No underflow */); + +#if defined(__aarch64__) + /* prefetch sequence starting from match that will be used for copy later */ + PREFETCH_L1(match); +#endif + /* Handle edge cases in a slow path: + * - Read beyond end of literals + * - Match end is within WILDCOPY_OVERLIMIT of oend + * - 32-bit mode and the match length overflows + */ + if (UNLIKELY( + iLitEnd > litLimit || + oMatchEnd > oend_w || + (MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH))) + return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd); + + /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */ + assert(op <= oLitEnd /* No overflow */); + assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */); + assert(oMatchEnd <= oend /* No underflow */); + assert(iLitEnd <= litLimit /* Literal length is in bounds */); + assert(oLitEnd <= oend_w /* Can wildcopy literals */); + assert(oMatchEnd <= oend_w /* Can wildcopy matches */); + + /* Copy Literals: + * Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9. + * We likely don't need the full 32-byte wildcopy. + */ + assert(WILDCOPY_OVERLENGTH >= 16); + ZSTD_copy16(op, (*litPtr)); + if (UNLIKELY(sequence.litLength > 16)) { + ZSTD_wildcopy(op + 16, (*litPtr) + 16, sequence.litLength - 16, ZSTD_no_overlap); + } + op = oLitEnd; + *litPtr = iLitEnd; /* update for next sequence */ + + /* Copy Match */ + if (sequence.offset > (size_t)(oLitEnd - prefixStart)) { + /* offset beyond prefix -> go into extDict */ + RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, ""); + match = dictEnd + (match - prefixStart); + if (match + sequence.matchLength <= dictEnd) { + ZSTD_memmove(oLitEnd, match, sequence.matchLength); + return sequenceLength; + } + /* span extDict & currentPrefixSegment */ + { size_t const length1 = dictEnd - match; + ZSTD_memmove(oLitEnd, match, length1); + op = oLitEnd + length1; + sequence.matchLength -= length1; + match = prefixStart; + } + } + /* Match within prefix of 1 or more bytes */ + assert(op <= oMatchEnd); + assert(oMatchEnd <= oend_w); + assert(match >= prefixStart); + assert(sequence.matchLength >= 1); + + /* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy + * without overlap checking. + */ + if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) { + /* We bet on a full wildcopy for matches, since we expect matches to be + * longer than literals (in general). In silesia, ~10% of matches are longer + * than 16 bytes. + */ + ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap); + return sequenceLength; + } + assert(sequence.offset < WILDCOPY_VECLEN); + + /* Copy 8 bytes and spread the offset to be >= 8. */ + ZSTD_overlapCopy8(&op, &match, sequence.offset); + + /* If the match length is > 8 bytes, then continue with the wildcopy. */ + if (sequence.matchLength > 8) { + assert(op < oMatchEnd); + ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength - 8, ZSTD_overlap_src_before_dst); + } + return sequenceLength; +} + +HINT_INLINE +ZSTD_ALLOW_POINTER_OVERFLOW_ATTR +size_t ZSTD_execSequenceSplitLitBuffer(BYTE* op, + BYTE* const oend, const BYTE* const oend_w, seq_t sequence, + const BYTE** litPtr, const BYTE* const litLimit, + const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd) +{ + BYTE* const oLitEnd = op + sequence.litLength; + size_t const sequenceLength = sequence.litLength + sequence.matchLength; + BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */ + const BYTE* const iLitEnd = *litPtr + sequence.litLength; + const BYTE* match = oLitEnd - sequence.offset; + + assert(op != NULL /* Precondition */); + assert(oend_w < oend /* No underflow */); + /* Handle edge cases in a slow path: + * - Read beyond end of literals + * - Match end is within WILDCOPY_OVERLIMIT of oend + * - 32-bit mode and the match length overflows + */ + if (UNLIKELY( + iLitEnd > litLimit || + oMatchEnd > oend_w || + (MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH))) + return ZSTD_execSequenceEndSplitLitBuffer(op, oend, oend_w, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd); + + /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */ + assert(op <= oLitEnd /* No overflow */); + assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */); + assert(oMatchEnd <= oend /* No underflow */); + assert(iLitEnd <= litLimit /* Literal length is in bounds */); + assert(oLitEnd <= oend_w /* Can wildcopy literals */); + assert(oMatchEnd <= oend_w /* Can wildcopy matches */); + + /* Copy Literals: + * Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9. + * We likely don't need the full 32-byte wildcopy. + */ + assert(WILDCOPY_OVERLENGTH >= 16); + ZSTD_copy16(op, (*litPtr)); + if (UNLIKELY(sequence.litLength > 16)) { + ZSTD_wildcopy(op+16, (*litPtr)+16, sequence.litLength-16, ZSTD_no_overlap); + } + op = oLitEnd; + *litPtr = iLitEnd; /* update for next sequence */ + + /* Copy Match */ + if (sequence.offset > (size_t)(oLitEnd - prefixStart)) { + /* offset beyond prefix -> go into extDict */ + RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, ""); + match = dictEnd + (match - prefixStart); + if (match + sequence.matchLength <= dictEnd) { + ZSTD_memmove(oLitEnd, match, sequence.matchLength); + return sequenceLength; + } + /* span extDict & currentPrefixSegment */ + { size_t const length1 = dictEnd - match; + ZSTD_memmove(oLitEnd, match, length1); + op = oLitEnd + length1; + sequence.matchLength -= length1; + match = prefixStart; + } } + /* Match within prefix of 1 or more bytes */ + assert(op <= oMatchEnd); + assert(oMatchEnd <= oend_w); + assert(match >= prefixStart); + assert(sequence.matchLength >= 1); + + /* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy + * without overlap checking. + */ + if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) { + /* We bet on a full wildcopy for matches, since we expect matches to be + * longer than literals (in general). In silesia, ~10% of matches are longer + * than 16 bytes. + */ + ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap); + return sequenceLength; + } + assert(sequence.offset < WILDCOPY_VECLEN); + + /* Copy 8 bytes and spread the offset to be >= 8. */ + ZSTD_overlapCopy8(&op, &match, sequence.offset); + + /* If the match length is > 8 bytes, then continue with the wildcopy. */ + if (sequence.matchLength > 8) { + assert(op < oMatchEnd); + ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst); + } + return sequenceLength; +} + + +static void +ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqSymbol* dt) +{ + const void* ptr = dt; + const ZSTD_seqSymbol_header* const DTableH = (const ZSTD_seqSymbol_header*)ptr; + DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog); + DEBUGLOG(6, "ZSTD_initFseState : val=%u using %u bits", + (U32)DStatePtr->state, DTableH->tableLog); + BIT_reloadDStream(bitD); + DStatePtr->table = dt + 1; +} + +FORCE_INLINE_TEMPLATE void +ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, U16 nextState, U32 nbBits) +{ + size_t const lowBits = BIT_readBits(bitD, nbBits); + DStatePtr->state = nextState + lowBits; +} + +/* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum + * offset bits. But we can only read at most STREAM_ACCUMULATOR_MIN_32 + * bits before reloading. This value is the maximum number of bytes we read + * after reloading when we are decoding long offsets. + */ +#define LONG_OFFSETS_MAX_EXTRA_BITS_32 \ + (ZSTD_WINDOWLOG_MAX_32 > STREAM_ACCUMULATOR_MIN_32 \ + ? ZSTD_WINDOWLOG_MAX_32 - STREAM_ACCUMULATOR_MIN_32 \ + : 0) + +typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e; + +/** + * ZSTD_decodeSequence(): + * @p longOffsets : tells the decoder to reload more bit while decoding large offsets + * only used in 32-bit mode + * @return : Sequence (litL + matchL + offset) + */ +FORCE_INLINE_TEMPLATE seq_t +ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, const int isLastSeq) +{ + seq_t seq; + /* + * ZSTD_seqSymbol is a 64 bits wide structure. + * It can be loaded in one operation + * and its fields extracted by simply shifting or bit-extracting on aarch64. + * GCC doesn't recognize this and generates more unnecessary ldr/ldrb/ldrh + * operations that cause performance drop. This can be avoided by using this + * ZSTD_memcpy hack. + */ +#if defined(__aarch64__) && (defined(__GNUC__) && !defined(__clang__)) + ZSTD_seqSymbol llDInfoS, mlDInfoS, ofDInfoS; + ZSTD_seqSymbol* const llDInfo = &llDInfoS; + ZSTD_seqSymbol* const mlDInfo = &mlDInfoS; + ZSTD_seqSymbol* const ofDInfo = &ofDInfoS; + ZSTD_memcpy(llDInfo, seqState->stateLL.table + seqState->stateLL.state, sizeof(ZSTD_seqSymbol)); + ZSTD_memcpy(mlDInfo, seqState->stateML.table + seqState->stateML.state, sizeof(ZSTD_seqSymbol)); + ZSTD_memcpy(ofDInfo, seqState->stateOffb.table + seqState->stateOffb.state, sizeof(ZSTD_seqSymbol)); +#else + const ZSTD_seqSymbol* const llDInfo = seqState->stateLL.table + seqState->stateLL.state; + const ZSTD_seqSymbol* const mlDInfo = seqState->stateML.table + seqState->stateML.state; + const ZSTD_seqSymbol* const ofDInfo = seqState->stateOffb.table + seqState->stateOffb.state; +#endif + seq.matchLength = mlDInfo->baseValue; + seq.litLength = llDInfo->baseValue; + { U32 const ofBase = ofDInfo->baseValue; + BYTE const llBits = llDInfo->nbAdditionalBits; + BYTE const mlBits = mlDInfo->nbAdditionalBits; + BYTE const ofBits = ofDInfo->nbAdditionalBits; + BYTE const totalBits = llBits+mlBits+ofBits; + + U16 const llNext = llDInfo->nextState; + U16 const mlNext = mlDInfo->nextState; + U16 const ofNext = ofDInfo->nextState; + U32 const llnbBits = llDInfo->nbBits; + U32 const mlnbBits = mlDInfo->nbBits; + U32 const ofnbBits = ofDInfo->nbBits; + + assert(llBits <= MaxLLBits); + assert(mlBits <= MaxMLBits); + assert(ofBits <= MaxOff); + /* + * As gcc has better branch and block analyzers, sometimes it is only + * valuable to mark likeliness for clang, it gives around 3-4% of + * performance. + */ + + /* sequence */ + { size_t offset; + if (ofBits > 1) { + ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1); + ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5); + ZSTD_STATIC_ASSERT(STREAM_ACCUMULATOR_MIN_32 > LONG_OFFSETS_MAX_EXTRA_BITS_32); + ZSTD_STATIC_ASSERT(STREAM_ACCUMULATOR_MIN_32 - LONG_OFFSETS_MAX_EXTRA_BITS_32 >= MaxMLBits); + if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) { + /* Always read extra bits, this keeps the logic simple, + * avoids branches, and avoids accidentally reading 0 bits. + */ + U32 const extraBits = LONG_OFFSETS_MAX_EXTRA_BITS_32; + offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits); + BIT_reloadDStream(&seqState->DStream); + offset += BIT_readBitsFast(&seqState->DStream, extraBits); + } else { + offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */ + if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); + } + seqState->prevOffset[2] = seqState->prevOffset[1]; + seqState->prevOffset[1] = seqState->prevOffset[0]; + seqState->prevOffset[0] = offset; + } else { + U32 const ll0 = (llDInfo->baseValue == 0); + if (LIKELY((ofBits == 0))) { + offset = seqState->prevOffset[ll0]; + seqState->prevOffset[1] = seqState->prevOffset[!ll0]; + seqState->prevOffset[0] = offset; + } else { + offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, 1); + { size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset]; + temp -= !temp; /* 0 is not valid: input corrupted => force offset to -1 => corruption detected at execSequence */ + if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1]; + seqState->prevOffset[1] = seqState->prevOffset[0]; + seqState->prevOffset[0] = offset = temp; + } } } + seq.offset = offset; + } + + if (mlBits > 0) + seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/); + + if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32)) + BIT_reloadDStream(&seqState->DStream); + if (MEM_64bits() && UNLIKELY(totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog))) + BIT_reloadDStream(&seqState->DStream); + /* Ensure there are enough bits to read the rest of data in 64-bit mode. */ + ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64); + + if (llBits > 0) + seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/); + + if (MEM_32bits()) + BIT_reloadDStream(&seqState->DStream); + + DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u", + (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset); + + if (!isLastSeq) { + /* don't update FSE state for last Sequence */ + ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llNext, llnbBits); /* <= 9 bits */ + ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlNext, mlnbBits); /* <= 9 bits */ + if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */ + ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofNext, ofnbBits); /* <= 8 bits */ + BIT_reloadDStream(&seqState->DStream); + } + } + + return seq; +} + +#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) +#if DEBUGLEVEL >= 1 +static int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd) +{ + size_t const windowSize = dctx->fParams.windowSize; + /* No dictionary used. */ + if (dctx->dictContentEndForFuzzing == NULL) return 0; + /* Dictionary is our prefix. */ + if (prefixStart == dctx->dictContentBeginForFuzzing) return 1; + /* Dictionary is not our ext-dict. */ + if (dctx->dictEnd != dctx->dictContentEndForFuzzing) return 0; + /* Dictionary is not within our window size. */ + if ((size_t)(oLitEnd - prefixStart) >= windowSize) return 0; + /* Dictionary is active. */ + return 1; +} +#endif + +static void ZSTD_assertValidSequence( + ZSTD_DCtx const* dctx, + BYTE const* op, BYTE const* oend, + seq_t const seq, + BYTE const* prefixStart, BYTE const* virtualStart) +{ +#if DEBUGLEVEL >= 1 + if (dctx->isFrameDecompression) { + size_t const windowSize = dctx->fParams.windowSize; + size_t const sequenceSize = seq.litLength + seq.matchLength; + BYTE const* const oLitEnd = op + seq.litLength; + DEBUGLOG(6, "Checking sequence: litL=%u matchL=%u offset=%u", + (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset); + assert(op <= oend); + assert((size_t)(oend - op) >= sequenceSize); + assert(sequenceSize <= ZSTD_blockSizeMax(dctx)); + if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) { + size_t const dictSize = (size_t)((char const*)dctx->dictContentEndForFuzzing - (char const*)dctx->dictContentBeginForFuzzing); + /* Offset must be within the dictionary. */ + assert(seq.offset <= (size_t)(oLitEnd - virtualStart)); + assert(seq.offset <= windowSize + dictSize); + } else { + /* Offset must be within our window. */ + assert(seq.offset <= windowSize); + } + } +#else + (void)dctx, (void)op, (void)oend, (void)seq, (void)prefixStart, (void)virtualStart; +#endif +} +#endif + +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG + + +FORCE_INLINE_TEMPLATE size_t +DONT_VECTORIZE +ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset) +{ + const BYTE* ip = (const BYTE*)seqStart; + const BYTE* const iend = ip + seqSize; + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ZSTD_maybeNullPtrAdd(ostart, maxDstSize); + BYTE* op = ostart; + const BYTE* litPtr = dctx->litPtr; + const BYTE* litBufferEnd = dctx->litBufferEnd; + const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart); + const BYTE* const vBase = (const BYTE*) (dctx->virtualStart); + const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); + DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer (%i seqs)", nbSeq); + + /* Literals are split between internal buffer & output buffer */ + if (nbSeq) { + seqState_t seqState; + dctx->fseEntropy = 1; + { U32 i; for (i=0; ientropy.rep[i]; } + RETURN_ERROR_IF( + ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)), + corruption_detected, ""); + ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr); + ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr); + ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr); + assert(dst != NULL); + + ZSTD_STATIC_ASSERT( + BIT_DStream_unfinished < BIT_DStream_completed && + BIT_DStream_endOfBuffer < BIT_DStream_completed && + BIT_DStream_completed < BIT_DStream_overflow); + + /* decompress without overrunning litPtr begins */ + { seq_t sequence = {0,0,0}; /* some static analyzer believe that @sequence is not initialized (it necessarily is, since for(;;) loop as at least one iteration) */ + /* Align the decompression loop to 32 + 16 bytes. + * + * zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression + * speed swings based on the alignment of the decompression loop. This + * performance swing is caused by parts of the decompression loop falling + * out of the DSB. The entire decompression loop should fit in the DSB, + * when it can't we get much worse performance. You can measure if you've + * hit the good case or the bad case with this perf command for some + * compressed file test.zst: + * + * perf stat -e cycles -e instructions -e idq.all_dsb_cycles_any_uops \ + * -e idq.all_mite_cycles_any_uops -- ./zstd -tq test.zst + * + * If you see most cycles served out of the MITE you've hit the bad case. + * If you see most cycles served out of the DSB you've hit the good case. + * If it is pretty even then you may be in an okay case. + * + * This issue has been reproduced on the following CPUs: + * - Kabylake: Macbook Pro (15-inch, 2019) 2.4 GHz Intel Core i9 + * Use Instruments->Counters to get DSB/MITE cycles. + * I never got performance swings, but I was able to + * go from the good case of mostly DSB to half of the + * cycles served from MITE. + * - Coffeelake: Intel i9-9900k + * - Coffeelake: Intel i7-9700k + * + * I haven't been able to reproduce the instability or DSB misses on any + * of the following CPUS: + * - Haswell + * - Broadwell: Intel(R) Xeon(R) CPU E5-2680 v4 @ 2.40GH + * - Skylake + * + * Alignment is done for each of the three major decompression loops: + * - ZSTD_decompressSequences_bodySplitLitBuffer - presplit section of the literal buffer + * - ZSTD_decompressSequences_bodySplitLitBuffer - postsplit section of the literal buffer + * - ZSTD_decompressSequences_body + * Alignment choices are made to minimize large swings on bad cases and influence on performance + * from changes external to this code, rather than to overoptimize on the current commit. + * + * If you are seeing performance stability this script can help test. + * It tests on 4 commits in zstd where I saw performance change. + * + * https://gist.github.com/terrelln/9889fc06a423fd5ca6e99351564473f4 + */ +#if defined(__GNUC__) && defined(__x86_64__) + __asm__(".p2align 6"); +# if __GNUC__ >= 7 + /* good for gcc-7, gcc-9, and gcc-11 */ + __asm__("nop"); + __asm__(".p2align 5"); + __asm__("nop"); + __asm__(".p2align 4"); +# if __GNUC__ == 8 || __GNUC__ == 10 + /* good for gcc-8 and gcc-10 */ + __asm__("nop"); + __asm__(".p2align 3"); +# endif +# endif +#endif + + /* Handle the initial state where litBuffer is currently split between dst and litExtraBuffer */ + for ( ; nbSeq; nbSeq--) { + sequence = ZSTD_decodeSequence(&seqState, isLongOffset, nbSeq==1); + if (litPtr + sequence.litLength > dctx->litBufferEnd) break; + { size_t const oneSeqSize = ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence.litLength - WILDCOPY_OVERLENGTH, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd); +#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) + assert(!ZSTD_isError(oneSeqSize)); + ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase); +#endif + if (UNLIKELY(ZSTD_isError(oneSeqSize))) + return oneSeqSize; + DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize); + op += oneSeqSize; + } } + DEBUGLOG(6, "reached: (litPtr + sequence.litLength > dctx->litBufferEnd)"); + + /* If there are more sequences, they will need to read literals from litExtraBuffer; copy over the remainder from dst and update litPtr and litEnd */ + if (nbSeq > 0) { + const size_t leftoverLit = dctx->litBufferEnd - litPtr; + DEBUGLOG(6, "There are %i sequences left, and %zu/%zu literals left in buffer", nbSeq, leftoverLit, sequence.litLength); + if (leftoverLit) { + RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer"); + ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit); + sequence.litLength -= leftoverLit; + op += leftoverLit; + } + litPtr = dctx->litExtraBuffer; + litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE; + dctx->litBufferLocation = ZSTD_not_in_dst; + { size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd); +#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) + assert(!ZSTD_isError(oneSeqSize)); + ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase); +#endif + if (UNLIKELY(ZSTD_isError(oneSeqSize))) + return oneSeqSize; + DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize); + op += oneSeqSize; + } + nbSeq--; + } + } + + if (nbSeq > 0) { + /* there is remaining lit from extra buffer */ + +#if defined(__GNUC__) && defined(__x86_64__) + __asm__(".p2align 6"); + __asm__("nop"); +# if __GNUC__ != 7 + /* worse for gcc-7 better for gcc-8, gcc-9, and gcc-10 and clang */ + __asm__(".p2align 4"); + __asm__("nop"); + __asm__(".p2align 3"); +# elif __GNUC__ >= 11 + __asm__(".p2align 3"); +# else + __asm__(".p2align 5"); + __asm__("nop"); + __asm__(".p2align 3"); +# endif +#endif + + for ( ; nbSeq ; nbSeq--) { + seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, nbSeq==1); + size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd); +#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) + assert(!ZSTD_isError(oneSeqSize)); + ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase); +#endif + if (UNLIKELY(ZSTD_isError(oneSeqSize))) + return oneSeqSize; + DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize); + op += oneSeqSize; + } + } + + /* check if reached exact end */ + DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer: after decode loop, remaining nbSeq : %i", nbSeq); + RETURN_ERROR_IF(nbSeq, corruption_detected, ""); + DEBUGLOG(5, "bitStream : start=%p, ptr=%p, bitsConsumed=%u", seqState.DStream.start, seqState.DStream.ptr, seqState.DStream.bitsConsumed); + RETURN_ERROR_IF(!BIT_endOfDStream(&seqState.DStream), corruption_detected, ""); + /* save reps for next block */ + { U32 i; for (i=0; ientropy.rep[i] = (U32)(seqState.prevOffset[i]); } + } + + /* last literal segment */ + if (dctx->litBufferLocation == ZSTD_split) { + /* split hasn't been reached yet, first get dst then copy litExtraBuffer */ + size_t const lastLLSize = (size_t)(litBufferEnd - litPtr); + DEBUGLOG(6, "copy last literals from segment : %u", (U32)lastLLSize); + RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, ""); + if (op != NULL) { + ZSTD_memmove(op, litPtr, lastLLSize); + op += lastLLSize; + } + litPtr = dctx->litExtraBuffer; + litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE; + dctx->litBufferLocation = ZSTD_not_in_dst; + } + /* copy last literals from internal buffer */ + { size_t const lastLLSize = (size_t)(litBufferEnd - litPtr); + DEBUGLOG(6, "copy last literals from internal buffer : %u", (U32)lastLLSize); + RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, ""); + if (op != NULL) { + ZSTD_memcpy(op, litPtr, lastLLSize); + op += lastLLSize; + } } + + DEBUGLOG(6, "decoded block of size %u bytes", (U32)(op - ostart)); + return (size_t)(op - ostart); +} + +FORCE_INLINE_TEMPLATE size_t +DONT_VECTORIZE +ZSTD_decompressSequences_body(ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset) +{ + const BYTE* ip = (const BYTE*)seqStart; + const BYTE* const iend = ip + seqSize; + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = dctx->litBufferLocation == ZSTD_not_in_dst ? ZSTD_maybeNullPtrAdd(ostart, maxDstSize) : dctx->litBuffer; + BYTE* op = ostart; + const BYTE* litPtr = dctx->litPtr; + const BYTE* const litEnd = litPtr + dctx->litSize; + const BYTE* const prefixStart = (const BYTE*)(dctx->prefixStart); + const BYTE* const vBase = (const BYTE*)(dctx->virtualStart); + const BYTE* const dictEnd = (const BYTE*)(dctx->dictEnd); + DEBUGLOG(5, "ZSTD_decompressSequences_body: nbSeq = %d", nbSeq); + + /* Regen sequences */ + if (nbSeq) { + seqState_t seqState; + dctx->fseEntropy = 1; + { U32 i; for (i = 0; i < ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; } + RETURN_ERROR_IF( + ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend - ip)), + corruption_detected, ""); + ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr); + ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr); + ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr); + assert(dst != NULL); + +#if defined(__GNUC__) && defined(__x86_64__) + __asm__(".p2align 6"); + __asm__("nop"); +# if __GNUC__ >= 7 + __asm__(".p2align 5"); + __asm__("nop"); + __asm__(".p2align 3"); +# else + __asm__(".p2align 4"); + __asm__("nop"); + __asm__(".p2align 3"); +# endif +#endif + + for ( ; nbSeq ; nbSeq--) { + seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, nbSeq==1); + size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd); +#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) + assert(!ZSTD_isError(oneSeqSize)); + ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase); +#endif + if (UNLIKELY(ZSTD_isError(oneSeqSize))) + return oneSeqSize; + DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize); + op += oneSeqSize; + } + + /* check if reached exact end */ + assert(nbSeq == 0); + RETURN_ERROR_IF(!BIT_endOfDStream(&seqState.DStream), corruption_detected, ""); + /* save reps for next block */ + { U32 i; for (i=0; ientropy.rep[i] = (U32)(seqState.prevOffset[i]); } + } + + /* last literal segment */ + { size_t const lastLLSize = (size_t)(litEnd - litPtr); + DEBUGLOG(6, "copy last literals : %u", (U32)lastLLSize); + RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, ""); + if (op != NULL) { + ZSTD_memcpy(op, litPtr, lastLLSize); + op += lastLLSize; + } } + + DEBUGLOG(6, "decoded block of size %u bytes", (U32)(op - ostart)); + return (size_t)(op - ostart); +} + +static size_t +ZSTD_decompressSequences_default(ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset) +{ + return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); +} + +static size_t +ZSTD_decompressSequencesSplitLitBuffer_default(ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset) +{ + return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); +} +#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ + +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT + +FORCE_INLINE_TEMPLATE + +size_t ZSTD_prefetchMatch(size_t prefetchPos, seq_t const sequence, + const BYTE* const prefixStart, const BYTE* const dictEnd) +{ + prefetchPos += sequence.litLength; + { const BYTE* const matchBase = (sequence.offset > prefetchPos) ? dictEnd : prefixStart; + /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted. + * No consequence though : memory address is only used for prefetching, not for dereferencing */ + const BYTE* const match = ZSTD_wrappedPtrSub(ZSTD_wrappedPtrAdd(matchBase, prefetchPos), sequence.offset); + PREFETCH_L1(match); PREFETCH_L1(match+CACHELINE_SIZE); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */ + } + return prefetchPos + sequence.matchLength; +} + +/* This decoding function employs prefetching + * to reduce latency impact of cache misses. + * It's generally employed when block contains a significant portion of long-distance matches + * or when coupled with a "cold" dictionary */ +FORCE_INLINE_TEMPLATE size_t +ZSTD_decompressSequencesLong_body( + ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset) +{ + const BYTE* ip = (const BYTE*)seqStart; + const BYTE* const iend = ip + seqSize; + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = dctx->litBufferLocation == ZSTD_in_dst ? dctx->litBuffer : ZSTD_maybeNullPtrAdd(ostart, maxDstSize); + BYTE* op = ostart; + const BYTE* litPtr = dctx->litPtr; + const BYTE* litBufferEnd = dctx->litBufferEnd; + const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart); + const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart); + const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); + + /* Regen sequences */ + if (nbSeq) { +#define STORED_SEQS 8 +#define STORED_SEQS_MASK (STORED_SEQS-1) +#define ADVANCED_SEQS STORED_SEQS + seq_t sequences[STORED_SEQS]; + int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS); + seqState_t seqState; + int seqNb; + size_t prefetchPos = (size_t)(op-prefixStart); /* track position relative to prefixStart */ + + dctx->fseEntropy = 1; + { int i; for (i=0; ientropy.rep[i]; } + assert(dst != NULL); + assert(iend >= ip); + RETURN_ERROR_IF( + ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)), + corruption_detected, ""); + ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr); + ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr); + ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr); + + /* prepare in advance */ + for (seqNb=0; seqNblitBufferLocation == ZSTD_split && litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength > dctx->litBufferEnd) { + /* lit buffer is reaching split point, empty out the first buffer and transition to litExtraBuffer */ + const size_t leftoverLit = dctx->litBufferEnd - litPtr; + if (leftoverLit) + { + RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer"); + ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit); + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength -= leftoverLit; + op += leftoverLit; + } + litPtr = dctx->litExtraBuffer; + litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE; + dctx->litBufferLocation = ZSTD_not_in_dst; + { size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd); +#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) + assert(!ZSTD_isError(oneSeqSize)); + ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart); +#endif + if (ZSTD_isError(oneSeqSize)) return oneSeqSize; + + prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd); + sequences[seqNb & STORED_SEQS_MASK] = sequence; + op += oneSeqSize; + } } + else + { + /* lit buffer is either wholly contained in first or second split, or not split at all*/ + size_t const oneSeqSize = dctx->litBufferLocation == ZSTD_split ? + ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength - WILDCOPY_OVERLENGTH, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd) : + ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd); +#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) + assert(!ZSTD_isError(oneSeqSize)); + ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart); +#endif + if (ZSTD_isError(oneSeqSize)) return oneSeqSize; + + prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd); + sequences[seqNb & STORED_SEQS_MASK] = sequence; + op += oneSeqSize; + } + } + RETURN_ERROR_IF(!BIT_endOfDStream(&seqState.DStream), corruption_detected, ""); + + /* finish queue */ + seqNb -= seqAdvance; + for ( ; seqNblitBufferLocation == ZSTD_split && litPtr + sequence->litLength > dctx->litBufferEnd) { + const size_t leftoverLit = dctx->litBufferEnd - litPtr; + if (leftoverLit) { + RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer"); + ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit); + sequence->litLength -= leftoverLit; + op += leftoverLit; + } + litPtr = dctx->litExtraBuffer; + litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE; + dctx->litBufferLocation = ZSTD_not_in_dst; + { size_t const oneSeqSize = ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd); +#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) + assert(!ZSTD_isError(oneSeqSize)); + ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart); +#endif + if (ZSTD_isError(oneSeqSize)) return oneSeqSize; + op += oneSeqSize; + } + } + else + { + size_t const oneSeqSize = dctx->litBufferLocation == ZSTD_split ? + ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence->litLength - WILDCOPY_OVERLENGTH, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd) : + ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd); +#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) + assert(!ZSTD_isError(oneSeqSize)); + ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart); +#endif + if (ZSTD_isError(oneSeqSize)) return oneSeqSize; + op += oneSeqSize; + } + } + + /* save reps for next block */ + { U32 i; for (i=0; ientropy.rep[i] = (U32)(seqState.prevOffset[i]); } + } + + /* last literal segment */ + if (dctx->litBufferLocation == ZSTD_split) { /* first deplete literal buffer in dst, then copy litExtraBuffer */ + size_t const lastLLSize = litBufferEnd - litPtr; + RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, ""); + if (op != NULL) { + ZSTD_memmove(op, litPtr, lastLLSize); + op += lastLLSize; + } + litPtr = dctx->litExtraBuffer; + litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE; + } + { size_t const lastLLSize = litBufferEnd - litPtr; + RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, ""); + if (op != NULL) { + ZSTD_memmove(op, litPtr, lastLLSize); + op += lastLLSize; + } + } + + return (size_t)(op - ostart); +} + +static size_t +ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset) +{ + return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); +} +#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */ + + + +#if DYNAMIC_BMI2 + +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG +static BMI2_TARGET_ATTRIBUTE size_t +DONT_VECTORIZE +ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset) +{ + return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); +} +static BMI2_TARGET_ATTRIBUTE size_t +DONT_VECTORIZE +ZSTD_decompressSequencesSplitLitBuffer_bmi2(ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset) +{ + return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); +} +#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ + +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT +static BMI2_TARGET_ATTRIBUTE size_t +ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset) +{ + return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); +} +#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */ + +#endif /* DYNAMIC_BMI2 */ + +typedef size_t (*ZSTD_decompressSequences_t)( + ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset); + +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG +static size_t +ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset) +{ + DEBUGLOG(5, "ZSTD_decompressSequences"); +#if DYNAMIC_BMI2 + if (ZSTD_DCtx_get_bmi2(dctx)) { + return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); + } +#endif + return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); +} +static size_t +ZSTD_decompressSequencesSplitLitBuffer(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset) +{ + DEBUGLOG(5, "ZSTD_decompressSequencesSplitLitBuffer"); +#if DYNAMIC_BMI2 + if (ZSTD_DCtx_get_bmi2(dctx)) { + return ZSTD_decompressSequencesSplitLitBuffer_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); + } +#endif + return ZSTD_decompressSequencesSplitLitBuffer_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); +} +#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ + + +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT +/* ZSTD_decompressSequencesLong() : + * decompression function triggered when a minimum share of offsets is considered "long", + * aka out of cache. + * note : "long" definition seems overloaded here, sometimes meaning "wider than bitstream register", and sometimes meaning "farther than memory cache distance". + * This function will try to mitigate main memory latency through the use of prefetching */ +static size_t +ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset) +{ + DEBUGLOG(5, "ZSTD_decompressSequencesLong"); +#if DYNAMIC_BMI2 + if (ZSTD_DCtx_get_bmi2(dctx)) { + return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); + } +#endif + return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); +} +#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */ + + +/** + * @returns The total size of the history referenceable by zstd, including + * both the prefix and the extDict. At @p op any offset larger than this + * is invalid. + */ +static size_t ZSTD_totalHistorySize(BYTE* op, BYTE const* virtualStart) +{ + return (size_t)(op - virtualStart); +} + +typedef struct { + unsigned longOffsetShare; + unsigned maxNbAdditionalBits; +} ZSTD_OffsetInfo; + +/* ZSTD_getOffsetInfo() : + * condition : offTable must be valid + * @return : "share" of long offsets (arbitrarily defined as > (1<<23)) + * compared to maximum possible of (1< 22) info.longOffsetShare += 1; + } + + assert(tableLog <= OffFSELog); + info.longOffsetShare <<= (OffFSELog - tableLog); /* scale to OffFSELog */ + } + + return info; +} + +/** + * @returns The maximum offset we can decode in one read of our bitstream, without + * reloading more bits in the middle of the offset bits read. Any offsets larger + * than this must use the long offset decoder. + */ +static size_t ZSTD_maxShortOffset(void) +{ + if (MEM_64bits()) { + /* We can decode any offset without reloading bits. + * This might change if the max window size grows. + */ + ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31); + return (size_t)-1; + } else { + /* The maximum offBase is (1 << (STREAM_ACCUMULATOR_MIN + 1)) - 1. + * This offBase would require STREAM_ACCUMULATOR_MIN extra bits. + * Then we have to subtract ZSTD_REP_NUM to get the maximum possible offset. + */ + size_t const maxOffbase = ((size_t)1 << (STREAM_ACCUMULATOR_MIN + 1)) - 1; + size_t const maxOffset = maxOffbase - ZSTD_REP_NUM; + assert(ZSTD_highbit32((U32)maxOffbase) == STREAM_ACCUMULATOR_MIN); + return maxOffset; + } +} + +size_t +ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, const streaming_operation streaming) +{ /* blockType == blockCompressed */ + const BYTE* ip = (const BYTE*)src; + DEBUGLOG(5, "ZSTD_decompressBlock_internal (cSize : %u)", (unsigned)srcSize); + + /* Note : the wording of the specification + * allows compressed block to be sized exactly ZSTD_blockSizeMax(dctx). + * This generally does not happen, as it makes little sense, + * since an uncompressed block would feature same size and have no decompression cost. + * Also, note that decoder from reference libzstd before < v1.5.4 + * would consider this edge case as an error. + * As a consequence, avoid generating compressed blocks of size ZSTD_blockSizeMax(dctx) + * for broader compatibility with the deployed ecosystem of zstd decoders */ + RETURN_ERROR_IF(srcSize > ZSTD_blockSizeMax(dctx), srcSize_wrong, ""); + + /* Decode literals section */ + { size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize, dst, dstCapacity, streaming); + DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : cSize=%u, nbLiterals=%zu", (U32)litCSize, dctx->litSize); + if (ZSTD_isError(litCSize)) return litCSize; + ip += litCSize; + srcSize -= litCSize; + } + + /* Build Decoding Tables */ + { + /* Compute the maximum block size, which must also work when !frame and fParams are unset. + * Additionally, take the min with dstCapacity to ensure that the totalHistorySize fits in a size_t. + */ + size_t const blockSizeMax = MIN(dstCapacity, ZSTD_blockSizeMax(dctx)); + size_t const totalHistorySize = ZSTD_totalHistorySize(ZSTD_maybeNullPtrAdd((BYTE*)dst, blockSizeMax), (BYTE const*)dctx->virtualStart); + /* isLongOffset must be true if there are long offsets. + * Offsets are long if they are larger than ZSTD_maxShortOffset(). + * We don't expect that to be the case in 64-bit mode. + * + * We check here to see if our history is large enough to allow long offsets. + * If it isn't, then we can't possible have (valid) long offsets. If the offset + * is invalid, then it is okay to read it incorrectly. + * + * If isLongOffsets is true, then we will later check our decoding table to see + * if it is even possible to generate long offsets. + */ + ZSTD_longOffset_e isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (totalHistorySize > ZSTD_maxShortOffset())); + /* These macros control at build-time which decompressor implementation + * we use. If neither is defined, we do some inspection and dispatch at + * runtime. + */ +#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ + !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) + int usePrefetchDecoder = dctx->ddictIsCold; +#else + /* Set to 1 to avoid computing offset info if we don't need to. + * Otherwise this value is ignored. + */ + int usePrefetchDecoder = 1; +#endif + int nbSeq; + size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, srcSize); + if (ZSTD_isError(seqHSize)) return seqHSize; + ip += seqHSize; + srcSize -= seqHSize; + + RETURN_ERROR_IF((dst == NULL || dstCapacity == 0) && nbSeq > 0, dstSize_tooSmall, "NULL not handled"); + RETURN_ERROR_IF(MEM_64bits() && sizeof(size_t) == sizeof(void*) && (size_t)(-1) - (size_t)dst < (size_t)(1 << 20), dstSize_tooSmall, + "invalid dst"); + + /* If we could potentially have long offsets, or we might want to use the prefetch decoder, + * compute information about the share of long offsets, and the maximum nbAdditionalBits. + * NOTE: could probably use a larger nbSeq limit + */ + if (isLongOffset || (!usePrefetchDecoder && (totalHistorySize > (1u << 24)) && (nbSeq > 8))) { + ZSTD_OffsetInfo const info = ZSTD_getOffsetInfo(dctx->OFTptr, nbSeq); + if (isLongOffset && info.maxNbAdditionalBits <= STREAM_ACCUMULATOR_MIN) { + /* If isLongOffset, but the maximum number of additional bits that we see in our table is small + * enough, then we know it is impossible to have too long an offset in this block, so we can + * use the regular offset decoder. + */ + isLongOffset = ZSTD_lo_isRegularOffset; + } + if (!usePrefetchDecoder) { + U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */ + usePrefetchDecoder = (info.longOffsetShare >= minShare); + } + } + + dctx->ddictIsCold = 0; + +#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ + !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) + if (usePrefetchDecoder) { +#else + (void)usePrefetchDecoder; + { +#endif +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT + return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset); +#endif + } + +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG + /* else */ + if (dctx->litBufferLocation == ZSTD_split) + return ZSTD_decompressSequencesSplitLitBuffer(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset); + else + return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset); +#endif + } +} + + +ZSTD_ALLOW_POINTER_OVERFLOW_ATTR +void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize) +{ + if (dst != dctx->previousDstEnd && dstSize > 0) { /* not contiguous */ + dctx->dictEnd = dctx->previousDstEnd; + dctx->virtualStart = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart)); + dctx->prefixStart = dst; + dctx->previousDstEnd = dst; + } +} + + +size_t ZSTD_decompressBlock_deprecated(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize) +{ + size_t dSize; + dctx->isFrameDecompression = 0; + ZSTD_checkContinuity(dctx, dst, dstCapacity); + dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, not_streaming); + FORWARD_IF_ERROR(dSize, ""); + dctx->previousDstEnd = (char*)dst + dSize; + return dSize; +} + + +/* NOTE: Must just wrap ZSTD_decompressBlock_deprecated() */ +size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize) +{ + return ZSTD_decompressBlock_deprecated(dctx, dst, dstCapacity, src, srcSize); +} diff --git a/c-blosc/internal-complibs/zstd-1.5.6/decompress/zstd_decompress_block.h b/c-blosc/internal-complibs/zstd-1.5.6/decompress/zstd_decompress_block.h new file mode 100644 index 0000000..ab15240 --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.5.6/decompress/zstd_decompress_block.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +#ifndef ZSTD_DEC_BLOCK_H +#define ZSTD_DEC_BLOCK_H + +/*-******************************************************* + * Dependencies + *********************************************************/ +#include "../common/zstd_deps.h" /* size_t */ +#include "../zstd.h" /* DCtx, and some public functions */ +#include "../common/zstd_internal.h" /* blockProperties_t, and some public functions */ +#include "zstd_decompress_internal.h" /* ZSTD_seqSymbol */ + + +/* === Prototypes === */ + +/* note: prototypes already published within `zstd.h` : + * ZSTD_decompressBlock() + */ + +/* note: prototypes already published within `zstd_internal.h` : + * ZSTD_getcBlockSize() + * ZSTD_decodeSeqHeaders() + */ + + + /* Streaming state is used to inform allocation of the literal buffer */ +typedef enum { + not_streaming = 0, + is_streaming = 1 +} streaming_operation; + +/* ZSTD_decompressBlock_internal() : + * decompress block, starting at `src`, + * into destination buffer `dst`. + * @return : decompressed block size, + * or an error code (which can be tested using ZSTD_isError()) + */ +size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, const streaming_operation streaming); + +/* ZSTD_buildFSETable() : + * generate FSE decoding table for one symbol (ll, ml or off) + * this function must be called with valid parameters only + * (dt is large enough, normalizedCounter distribution total is a power of 2, max is within range, etc.) + * in which case it cannot fail. + * The workspace must be 4-byte aligned and at least ZSTD_BUILD_FSE_TABLE_WKSP_SIZE bytes, which is + * defined in zstd_decompress_internal.h. + * Internal use only. + */ +void ZSTD_buildFSETable(ZSTD_seqSymbol* dt, + const short* normalizedCounter, unsigned maxSymbolValue, + const U32* baseValue, const U8* nbAdditionalBits, + unsigned tableLog, void* wksp, size_t wkspSize, + int bmi2); + +/* Internal definition of ZSTD_decompressBlock() to avoid deprecation warnings. */ +size_t ZSTD_decompressBlock_deprecated(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize); + + +#endif /* ZSTD_DEC_BLOCK_H */ diff --git a/c-blosc/internal-complibs/zstd-1.5.6/decompress/zstd_decompress_internal.h b/c-blosc/internal-complibs/zstd-1.5.6/decompress/zstd_decompress_internal.h new file mode 100644 index 0000000..83a7a01 --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.5.6/decompress/zstd_decompress_internal.h @@ -0,0 +1,240 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +/* zstd_decompress_internal: + * objects and definitions shared within lib/decompress modules */ + + #ifndef ZSTD_DECOMPRESS_INTERNAL_H + #define ZSTD_DECOMPRESS_INTERNAL_H + + +/*-******************************************************* + * Dependencies + *********************************************************/ +#include "../common/mem.h" /* BYTE, U16, U32 */ +#include "../common/zstd_internal.h" /* constants : MaxLL, MaxML, MaxOff, LLFSELog, etc. */ + + + +/*-******************************************************* + * Constants + *********************************************************/ +static UNUSED_ATTR const U32 LL_base[MaxLL+1] = { + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 18, 20, 22, 24, 28, 32, 40, + 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, + 0x2000, 0x4000, 0x8000, 0x10000 }; + +static UNUSED_ATTR const U32 OF_base[MaxOff+1] = { + 0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D, + 0xFD, 0x1FD, 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD, + 0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD, + 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD }; + +static UNUSED_ATTR const U8 OF_bits[MaxOff+1] = { + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31 }; + +static UNUSED_ATTR const U32 ML_base[MaxML+1] = { + 3, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25, 26, + 27, 28, 29, 30, 31, 32, 33, 34, + 35, 37, 39, 41, 43, 47, 51, 59, + 67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803, + 0x1003, 0x2003, 0x4003, 0x8003, 0x10003 }; + + +/*-******************************************************* + * Decompression types + *********************************************************/ + typedef struct { + U32 fastMode; + U32 tableLog; + } ZSTD_seqSymbol_header; + + typedef struct { + U16 nextState; + BYTE nbAdditionalBits; + BYTE nbBits; + U32 baseValue; + } ZSTD_seqSymbol; + + #define SEQSYMBOL_TABLE_SIZE(log) (1 + (1 << (log))) + +#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE (sizeof(S16) * (MaxSeq + 1) + (1u << MaxFSELog) + sizeof(U64)) +#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32 ((ZSTD_BUILD_FSE_TABLE_WKSP_SIZE + sizeof(U32) - 1) / sizeof(U32)) +#define ZSTD_HUFFDTABLE_CAPACITY_LOG 12 + +typedef struct { + ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)]; /* Note : Space reserved for FSE Tables */ + ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)]; /* is also used as temporary workspace while building hufTable during DDict creation */ + ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)]; /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */ + HUF_DTable hufTable[HUF_DTABLE_SIZE(ZSTD_HUFFDTABLE_CAPACITY_LOG)]; /* can accommodate HUF_decompress4X */ + U32 rep[ZSTD_REP_NUM]; + U32 workspace[ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32]; +} ZSTD_entropyDTables_t; + +typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader, + ZSTDds_decodeBlockHeader, ZSTDds_decompressBlock, + ZSTDds_decompressLastBlock, ZSTDds_checkChecksum, + ZSTDds_decodeSkippableHeader, ZSTDds_skipFrame } ZSTD_dStage; + +typedef enum { zdss_init=0, zdss_loadHeader, + zdss_read, zdss_load, zdss_flush } ZSTD_dStreamStage; + +typedef enum { + ZSTD_use_indefinitely = -1, /* Use the dictionary indefinitely */ + ZSTD_dont_use = 0, /* Do not use the dictionary (if one exists free it) */ + ZSTD_use_once = 1 /* Use the dictionary once and set to ZSTD_dont_use */ +} ZSTD_dictUses_e; + +/* Hashset for storing references to multiple ZSTD_DDict within ZSTD_DCtx */ +typedef struct { + const ZSTD_DDict** ddictPtrTable; + size_t ddictPtrTableSize; + size_t ddictPtrCount; +} ZSTD_DDictHashSet; + +#ifndef ZSTD_DECODER_INTERNAL_BUFFER +# define ZSTD_DECODER_INTERNAL_BUFFER (1 << 16) +#endif + +#define ZSTD_LBMIN 64 +#define ZSTD_LBMAX (128 << 10) + +/* extra buffer, compensates when dst is not large enough to store litBuffer */ +#define ZSTD_LITBUFFEREXTRASIZE BOUNDED(ZSTD_LBMIN, ZSTD_DECODER_INTERNAL_BUFFER, ZSTD_LBMAX) + +typedef enum { + ZSTD_not_in_dst = 0, /* Stored entirely within litExtraBuffer */ + ZSTD_in_dst = 1, /* Stored entirely within dst (in memory after current output write) */ + ZSTD_split = 2 /* Split between litExtraBuffer and dst */ +} ZSTD_litLocation_e; + +struct ZSTD_DCtx_s +{ + const ZSTD_seqSymbol* LLTptr; + const ZSTD_seqSymbol* MLTptr; + const ZSTD_seqSymbol* OFTptr; + const HUF_DTable* HUFptr; + ZSTD_entropyDTables_t entropy; + U32 workspace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; /* space needed when building huffman tables */ + const void* previousDstEnd; /* detect continuity */ + const void* prefixStart; /* start of current segment */ + const void* virtualStart; /* virtual start of previous segment if it was just before current one */ + const void* dictEnd; /* end of previous segment */ + size_t expected; + ZSTD_frameHeader fParams; + U64 processedCSize; + U64 decodedSize; + blockType_e bType; /* used in ZSTD_decompressContinue(), store blockType between block header decoding and block decompression stages */ + ZSTD_dStage stage; + U32 litEntropy; + U32 fseEntropy; + XXH64_state_t xxhState; + size_t headerSize; + ZSTD_format_e format; + ZSTD_forceIgnoreChecksum_e forceIgnoreChecksum; /* User specified: if == 1, will ignore checksums in compressed frame. Default == 0 */ + U32 validateChecksum; /* if == 1, will validate checksum. Is == 1 if (fParams.checksumFlag == 1) and (forceIgnoreChecksum == 0). */ + const BYTE* litPtr; + ZSTD_customMem customMem; + size_t litSize; + size_t rleSize; + size_t staticSize; + int isFrameDecompression; +#if DYNAMIC_BMI2 != 0 + int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */ +#endif + + /* dictionary */ + ZSTD_DDict* ddictLocal; + const ZSTD_DDict* ddict; /* set by ZSTD_initDStream_usingDDict(), or ZSTD_DCtx_refDDict() */ + U32 dictID; + int ddictIsCold; /* if == 1 : dictionary is "new" for working context, and presumed "cold" (not in cpu cache) */ + ZSTD_dictUses_e dictUses; + ZSTD_DDictHashSet* ddictSet; /* Hash set for multiple ddicts */ + ZSTD_refMultipleDDicts_e refMultipleDDicts; /* User specified: if == 1, will allow references to multiple DDicts. Default == 0 (disabled) */ + int disableHufAsm; + int maxBlockSizeParam; + + /* streaming */ + ZSTD_dStreamStage streamStage; + char* inBuff; + size_t inBuffSize; + size_t inPos; + size_t maxWindowSize; + char* outBuff; + size_t outBuffSize; + size_t outStart; + size_t outEnd; + size_t lhSize; +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) + void* legacyContext; + U32 previousLegacyVersion; + U32 legacyVersion; +#endif + U32 hostageByte; + int noForwardProgress; + ZSTD_bufferMode_e outBufferMode; + ZSTD_outBuffer expectedOutBuffer; + + /* workspace */ + BYTE* litBuffer; + const BYTE* litBufferEnd; + ZSTD_litLocation_e litBufferLocation; + BYTE litExtraBuffer[ZSTD_LITBUFFEREXTRASIZE + WILDCOPY_OVERLENGTH]; /* literal buffer can be split between storage within dst and within this scratch buffer */ + BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX]; + + size_t oversizedDuration; + +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + void const* dictContentBeginForFuzzing; + void const* dictContentEndForFuzzing; +#endif + + /* Tracing */ +#if ZSTD_TRACE + ZSTD_TraceCtx traceCtx; +#endif +}; /* typedef'd to ZSTD_DCtx within "zstd.h" */ + +MEM_STATIC int ZSTD_DCtx_get_bmi2(const struct ZSTD_DCtx_s *dctx) { +#if DYNAMIC_BMI2 != 0 + return dctx->bmi2; +#else + (void)dctx; + return 0; +#endif +} + +/*-******************************************************* + * Shared internal functions + *********************************************************/ + +/*! ZSTD_loadDEntropy() : + * dict : must point at beginning of a valid zstd dictionary. + * @return : size of dictionary header (size of magic number + dict ID + entropy tables) */ +size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy, + const void* const dict, size_t const dictSize); + +/*! ZSTD_checkContinuity() : + * check if next `dst` follows previous position, where decompression ended. + * If yes, do nothing (continue on current segment). + * If not, classify previous segment as "external dictionary", and start a new segment. + * This function cannot fail. */ +void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize); + + +#endif /* ZSTD_DECOMPRESS_INTERNAL_H */ diff --git a/c-blosc/internal-complibs/zstd-1.5.6/deprecated/zbuff.h b/c-blosc/internal-complibs/zstd-1.5.6/deprecated/zbuff.h new file mode 100644 index 0000000..a968245 --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.5.6/deprecated/zbuff.h @@ -0,0 +1,214 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/* *************************************************************** +* NOTES/WARNINGS +******************************************************************/ +/* The streaming API defined here is deprecated. + * Consider migrating towards ZSTD_compressStream() API in `zstd.h` + * See 'lib/README.md'. + *****************************************************************/ + + +#if defined (__cplusplus) +extern "C" { +#endif + +#ifndef ZSTD_BUFFERED_H_23987 +#define ZSTD_BUFFERED_H_23987 + +/* ************************************* +* Dependencies +***************************************/ +#include /* size_t */ +#include "../zstd.h" /* ZSTD_CStream, ZSTD_DStream, ZSTDLIB_API */ + + +/* *************************************************************** +* Compiler specifics +*****************************************************************/ +/* Deprecation warnings */ +/* Should these warnings be a problem, + * it is generally possible to disable them, + * typically with -Wno-deprecated-declarations for gcc + * or _CRT_SECURE_NO_WARNINGS in Visual. + * Otherwise, it's also possible to define ZBUFF_DISABLE_DEPRECATE_WARNINGS + */ +#ifdef ZBUFF_DISABLE_DEPRECATE_WARNINGS +# define ZBUFF_DEPRECATED(message) ZSTDLIB_API /* disable deprecation warnings */ +#else +# if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */ +# define ZBUFF_DEPRECATED(message) [[deprecated(message)]] ZSTDLIB_API +# elif (defined(GNUC) && (GNUC > 4 || (GNUC == 4 && GNUC_MINOR >= 5))) || defined(__clang__) +# define ZBUFF_DEPRECATED(message) ZSTDLIB_API __attribute__((deprecated(message))) +# elif defined(__GNUC__) && (__GNUC__ >= 3) +# define ZBUFF_DEPRECATED(message) ZSTDLIB_API __attribute__((deprecated)) +# elif defined(_MSC_VER) +# define ZBUFF_DEPRECATED(message) ZSTDLIB_API __declspec(deprecated(message)) +# else +# pragma message("WARNING: You need to implement ZBUFF_DEPRECATED for this compiler") +# define ZBUFF_DEPRECATED(message) ZSTDLIB_API +# endif +#endif /* ZBUFF_DISABLE_DEPRECATE_WARNINGS */ + + +/* ************************************* +* Streaming functions +***************************************/ +/* This is the easier "buffered" streaming API, +* using an internal buffer to lift all restrictions on user-provided buffers +* which can be any size, any place, for both input and output. +* ZBUFF and ZSTD are 100% interoperable, +* frames created by one can be decoded by the other one */ + +typedef ZSTD_CStream ZBUFF_CCtx; +ZBUFF_DEPRECATED("use ZSTD_createCStream") ZBUFF_CCtx* ZBUFF_createCCtx(void); +ZBUFF_DEPRECATED("use ZSTD_freeCStream") size_t ZBUFF_freeCCtx(ZBUFF_CCtx* cctx); + +ZBUFF_DEPRECATED("use ZSTD_initCStream") size_t ZBUFF_compressInit(ZBUFF_CCtx* cctx, int compressionLevel); +ZBUFF_DEPRECATED("use ZSTD_initCStream_usingDict") size_t ZBUFF_compressInitDictionary(ZBUFF_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel); + +ZBUFF_DEPRECATED("use ZSTD_compressStream") size_t ZBUFF_compressContinue(ZBUFF_CCtx* cctx, void* dst, size_t* dstCapacityPtr, const void* src, size_t* srcSizePtr); +ZBUFF_DEPRECATED("use ZSTD_flushStream") size_t ZBUFF_compressFlush(ZBUFF_CCtx* cctx, void* dst, size_t* dstCapacityPtr); +ZBUFF_DEPRECATED("use ZSTD_endStream") size_t ZBUFF_compressEnd(ZBUFF_CCtx* cctx, void* dst, size_t* dstCapacityPtr); + +/*-************************************************* +* Streaming compression - howto +* +* A ZBUFF_CCtx object is required to track streaming operation. +* Use ZBUFF_createCCtx() and ZBUFF_freeCCtx() to create/release resources. +* ZBUFF_CCtx objects can be reused multiple times. +* +* Start by initializing ZBUF_CCtx. +* Use ZBUFF_compressInit() to start a new compression operation. +* Use ZBUFF_compressInitDictionary() for a compression which requires a dictionary. +* +* Use ZBUFF_compressContinue() repetitively to consume input stream. +* *srcSizePtr and *dstCapacityPtr can be any size. +* The function will report how many bytes were read or written within *srcSizePtr and *dstCapacityPtr. +* Note that it may not consume the entire input, in which case it's up to the caller to present again remaining data. +* The content of `dst` will be overwritten (up to *dstCapacityPtr) at each call, so save its content if it matters or change @dst . +* @return : a hint to preferred nb of bytes to use as input for next function call (it's just a hint, to improve latency) +* or an error code, which can be tested using ZBUFF_isError(). +* +* At any moment, it's possible to flush whatever data remains within buffer, using ZBUFF_compressFlush(). +* The nb of bytes written into `dst` will be reported into *dstCapacityPtr. +* Note that the function cannot output more than *dstCapacityPtr, +* therefore, some content might still be left into internal buffer if *dstCapacityPtr is too small. +* @return : nb of bytes still present into internal buffer (0 if it's empty) +* or an error code, which can be tested using ZBUFF_isError(). +* +* ZBUFF_compressEnd() instructs to finish a frame. +* It will perform a flush and write frame epilogue. +* The epilogue is required for decoders to consider a frame completed. +* Similar to ZBUFF_compressFlush(), it may not be able to output the entire internal buffer content if *dstCapacityPtr is too small. +* In which case, call again ZBUFF_compressFlush() to complete the flush. +* @return : nb of bytes still present into internal buffer (0 if it's empty) +* or an error code, which can be tested using ZBUFF_isError(). +* +* Hint : _recommended buffer_ sizes (not compulsory) : ZBUFF_recommendedCInSize() / ZBUFF_recommendedCOutSize() +* input : ZBUFF_recommendedCInSize==128 KB block size is the internal unit, use this value to reduce intermediate stages (better latency) +* output : ZBUFF_recommendedCOutSize==ZSTD_compressBound(128 KB) + 3 + 3 : ensures it's always possible to write/flush/end a full block. Skip some buffering. +* By using both, it ensures that input will be entirely consumed, and output will always contain the result, reducing intermediate buffering. +* **************************************************/ + + +typedef ZSTD_DStream ZBUFF_DCtx; +ZBUFF_DEPRECATED("use ZSTD_createDStream") ZBUFF_DCtx* ZBUFF_createDCtx(void); +ZBUFF_DEPRECATED("use ZSTD_freeDStream") size_t ZBUFF_freeDCtx(ZBUFF_DCtx* dctx); + +ZBUFF_DEPRECATED("use ZSTD_initDStream") size_t ZBUFF_decompressInit(ZBUFF_DCtx* dctx); +ZBUFF_DEPRECATED("use ZSTD_initDStream_usingDict") size_t ZBUFF_decompressInitDictionary(ZBUFF_DCtx* dctx, const void* dict, size_t dictSize); + +ZBUFF_DEPRECATED("use ZSTD_decompressStream") size_t ZBUFF_decompressContinue(ZBUFF_DCtx* dctx, + void* dst, size_t* dstCapacityPtr, + const void* src, size_t* srcSizePtr); + +/*-*************************************************************************** +* Streaming decompression howto +* +* A ZBUFF_DCtx object is required to track streaming operations. +* Use ZBUFF_createDCtx() and ZBUFF_freeDCtx() to create/release resources. +* Use ZBUFF_decompressInit() to start a new decompression operation, +* or ZBUFF_decompressInitDictionary() if decompression requires a dictionary. +* Note that ZBUFF_DCtx objects can be re-init multiple times. +* +* Use ZBUFF_decompressContinue() repetitively to consume your input. +* *srcSizePtr and *dstCapacityPtr can be any size. +* The function will report how many bytes were read or written by modifying *srcSizePtr and *dstCapacityPtr. +* Note that it may not consume the entire input, in which case it's up to the caller to present remaining input again. +* The content of `dst` will be overwritten (up to *dstCapacityPtr) at each function call, so save its content if it matters, or change `dst`. +* @return : 0 when a frame is completely decoded and fully flushed, +* 1 when there is still some data left within internal buffer to flush, +* >1 when more data is expected, with value being a suggested next input size (it's just a hint, which helps latency), +* or an error code, which can be tested using ZBUFF_isError(). +* +* Hint : recommended buffer sizes (not compulsory) : ZBUFF_recommendedDInSize() and ZBUFF_recommendedDOutSize() +* output : ZBUFF_recommendedDOutSize== 128 KB block size is the internal unit, it ensures it's always possible to write a full block when decoded. +* input : ZBUFF_recommendedDInSize == 128KB + 3; +* just follow indications from ZBUFF_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 . +* *******************************************************************************/ + + +/* ************************************* +* Tool functions +***************************************/ +ZBUFF_DEPRECATED("use ZSTD_isError") unsigned ZBUFF_isError(size_t errorCode); +ZBUFF_DEPRECATED("use ZSTD_getErrorName") const char* ZBUFF_getErrorName(size_t errorCode); + +/** Functions below provide recommended buffer sizes for Compression or Decompression operations. +* These sizes are just hints, they tend to offer better latency */ +ZBUFF_DEPRECATED("use ZSTD_CStreamInSize") size_t ZBUFF_recommendedCInSize(void); +ZBUFF_DEPRECATED("use ZSTD_CStreamOutSize") size_t ZBUFF_recommendedCOutSize(void); +ZBUFF_DEPRECATED("use ZSTD_DStreamInSize") size_t ZBUFF_recommendedDInSize(void); +ZBUFF_DEPRECATED("use ZSTD_DStreamOutSize") size_t ZBUFF_recommendedDOutSize(void); + +#endif /* ZSTD_BUFFERED_H_23987 */ + + +#ifdef ZBUFF_STATIC_LINKING_ONLY +#ifndef ZBUFF_STATIC_H_30298098432 +#define ZBUFF_STATIC_H_30298098432 + +/* ==================================================================================== + * The definitions in this section are considered experimental. + * They should never be used in association with a dynamic library, as they may change in the future. + * They are provided for advanced usages. + * Use them only in association with static linking. + * ==================================================================================== */ + +/*--- Dependency ---*/ +#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_parameters, ZSTD_customMem */ +#include "../zstd.h" + + +/*--- Custom memory allocator ---*/ +/*! ZBUFF_createCCtx_advanced() : + * Create a ZBUFF compression context using external alloc and free functions */ +ZBUFF_DEPRECATED("use ZSTD_createCStream_advanced") ZBUFF_CCtx* ZBUFF_createCCtx_advanced(ZSTD_customMem customMem); + +/*! ZBUFF_createDCtx_advanced() : + * Create a ZBUFF decompression context using external alloc and free functions */ +ZBUFF_DEPRECATED("use ZSTD_createDStream_advanced") ZBUFF_DCtx* ZBUFF_createDCtx_advanced(ZSTD_customMem customMem); + + +/*--- Advanced Streaming Initialization ---*/ +ZBUFF_DEPRECATED("use ZSTD_initDStream_usingDict") size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* zbc, + const void* dict, size_t dictSize, + ZSTD_parameters params, unsigned long long pledgedSrcSize); + + +#endif /* ZBUFF_STATIC_H_30298098432 */ +#endif /* ZBUFF_STATIC_LINKING_ONLY */ + + +#if defined (__cplusplus) +} +#endif diff --git a/c-blosc/internal-complibs/zstd-1.5.6/deprecated/zbuff_common.c b/c-blosc/internal-complibs/zstd-1.5.6/deprecated/zbuff_common.c new file mode 100644 index 0000000..5a2f2db --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.5.6/deprecated/zbuff_common.c @@ -0,0 +1,26 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/*-************************************* +* Dependencies +***************************************/ +#include "../common/error_private.h" +#include "zbuff.h" + +/*-**************************************** +* ZBUFF Error Management (deprecated) +******************************************/ + +/*! ZBUFF_isError() : +* tells if a return value is an error code */ +unsigned ZBUFF_isError(size_t errorCode) { return ERR_isError(errorCode); } +/*! ZBUFF_getErrorName() : +* provides error code string from function result (useful for debugging) */ +const char* ZBUFF_getErrorName(size_t errorCode) { return ERR_getErrorName(errorCode); } diff --git a/c-blosc/internal-complibs/zstd-1.5.6/deprecated/zbuff_compress.c b/c-blosc/internal-complibs/zstd-1.5.6/deprecated/zbuff_compress.c new file mode 100644 index 0000000..1d86821 --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.5.6/deprecated/zbuff_compress.c @@ -0,0 +1,167 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + + +/* ************************************* +* Dependencies +***************************************/ +#define ZBUFF_STATIC_LINKING_ONLY +#include "zbuff.h" +#include "../common/error_private.h" + + +/*-*********************************************************** +* Streaming compression +* +* A ZBUFF_CCtx object is required to track streaming operation. +* Use ZBUFF_createCCtx() and ZBUFF_freeCCtx() to create/release resources. +* Use ZBUFF_compressInit() to start a new compression operation. +* ZBUFF_CCtx objects can be reused multiple times. +* +* Use ZBUFF_compressContinue() repetitively to consume your input. +* *srcSizePtr and *dstCapacityPtr can be any size. +* The function will report how many bytes were read or written by modifying *srcSizePtr and *dstCapacityPtr. +* Note that it may not consume the entire input, in which case it's up to the caller to call again the function with remaining input. +* The content of dst will be overwritten (up to *dstCapacityPtr) at each function call, so save its content if it matters or change dst . +* @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to improve latency) +* or an error code, which can be tested using ZBUFF_isError(). +* +* ZBUFF_compressFlush() can be used to instruct ZBUFF to compress and output whatever remains within its buffer. +* Note that it will not output more than *dstCapacityPtr. +* Therefore, some content might still be left into its internal buffer if dst buffer is too small. +* @return : nb of bytes still present into internal buffer (0 if it's empty) +* or an error code, which can be tested using ZBUFF_isError(). +* +* ZBUFF_compressEnd() instructs to finish a frame. +* It will perform a flush and write frame epilogue. +* Similar to ZBUFF_compressFlush(), it may not be able to output the entire internal buffer content if *dstCapacityPtr is too small. +* @return : nb of bytes still present into internal buffer (0 if it's empty) +* or an error code, which can be tested using ZBUFF_isError(). +* +* Hint : recommended buffer sizes (not compulsory) +* input : ZSTD_BLOCKSIZE_MAX (128 KB), internal unit size, it improves latency to use this value. +* output : ZSTD_compressBound(ZSTD_BLOCKSIZE_MAX) + ZSTD_blockHeaderSize + ZBUFF_endFrameSize : ensures it's always possible to write/flush/end a full block at best speed. +* ***********************************************************/ + +ZBUFF_CCtx* ZBUFF_createCCtx(void) +{ + return ZSTD_createCStream(); +} + +ZBUFF_CCtx* ZBUFF_createCCtx_advanced(ZSTD_customMem customMem) +{ + return ZSTD_createCStream_advanced(customMem); +} + +size_t ZBUFF_freeCCtx(ZBUFF_CCtx* zbc) +{ + return ZSTD_freeCStream(zbc); +} + + +/* ====== Initialization ====== */ + +size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* zbc, + const void* dict, size_t dictSize, + ZSTD_parameters params, unsigned long long pledgedSrcSize) +{ + if (pledgedSrcSize==0) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN; /* preserve "0 == unknown" behavior */ + FORWARD_IF_ERROR(ZSTD_CCtx_reset(zbc, ZSTD_reset_session_only), ""); + FORWARD_IF_ERROR(ZSTD_CCtx_setPledgedSrcSize(zbc, pledgedSrcSize), ""); + + FORWARD_IF_ERROR(ZSTD_checkCParams(params.cParams), ""); + FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_windowLog, params.cParams.windowLog), ""); + FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_hashLog, params.cParams.hashLog), ""); + FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_chainLog, params.cParams.chainLog), ""); + FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_searchLog, params.cParams.searchLog), ""); + FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_minMatch, params.cParams.minMatch), ""); + FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_targetLength, params.cParams.targetLength), ""); + FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_strategy, params.cParams.strategy), ""); + + FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_contentSizeFlag, params.fParams.contentSizeFlag), ""); + FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_checksumFlag, params.fParams.checksumFlag), ""); + FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_dictIDFlag, params.fParams.noDictIDFlag), ""); + + FORWARD_IF_ERROR(ZSTD_CCtx_loadDictionary(zbc, dict, dictSize), ""); + return 0; +} + +size_t ZBUFF_compressInitDictionary(ZBUFF_CCtx* zbc, const void* dict, size_t dictSize, int compressionLevel) +{ + FORWARD_IF_ERROR(ZSTD_CCtx_reset(zbc, ZSTD_reset_session_only), ""); + FORWARD_IF_ERROR(ZSTD_CCtx_setParameter(zbc, ZSTD_c_compressionLevel, compressionLevel), ""); + FORWARD_IF_ERROR(ZSTD_CCtx_loadDictionary(zbc, dict, dictSize), ""); + return 0; +} + +size_t ZBUFF_compressInit(ZBUFF_CCtx* zbc, int compressionLevel) +{ + return ZSTD_initCStream(zbc, compressionLevel); +} + +/* ====== Compression ====== */ + + +size_t ZBUFF_compressContinue(ZBUFF_CCtx* zbc, + void* dst, size_t* dstCapacityPtr, + const void* src, size_t* srcSizePtr) +{ + size_t result; + ZSTD_outBuffer outBuff; + ZSTD_inBuffer inBuff; + outBuff.dst = dst; + outBuff.pos = 0; + outBuff.size = *dstCapacityPtr; + inBuff.src = src; + inBuff.pos = 0; + inBuff.size = *srcSizePtr; + result = ZSTD_compressStream(zbc, &outBuff, &inBuff); + *dstCapacityPtr = outBuff.pos; + *srcSizePtr = inBuff.pos; + return result; +} + + + +/* ====== Finalize ====== */ + +size_t ZBUFF_compressFlush(ZBUFF_CCtx* zbc, void* dst, size_t* dstCapacityPtr) +{ + size_t result; + ZSTD_outBuffer outBuff; + outBuff.dst = dst; + outBuff.pos = 0; + outBuff.size = *dstCapacityPtr; + result = ZSTD_flushStream(zbc, &outBuff); + *dstCapacityPtr = outBuff.pos; + return result; +} + + +size_t ZBUFF_compressEnd(ZBUFF_CCtx* zbc, void* dst, size_t* dstCapacityPtr) +{ + size_t result; + ZSTD_outBuffer outBuff; + outBuff.dst = dst; + outBuff.pos = 0; + outBuff.size = *dstCapacityPtr; + result = ZSTD_endStream(zbc, &outBuff); + *dstCapacityPtr = outBuff.pos; + return result; +} + + + +/* ************************************* +* Tool functions +***************************************/ +size_t ZBUFF_recommendedCInSize(void) { return ZSTD_CStreamInSize(); } +size_t ZBUFF_recommendedCOutSize(void) { return ZSTD_CStreamOutSize(); } diff --git a/c-blosc/internal-complibs/zstd-1.5.6/deprecated/zbuff_decompress.c b/c-blosc/internal-complibs/zstd-1.5.6/deprecated/zbuff_decompress.c new file mode 100644 index 0000000..12a66af --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.5.6/deprecated/zbuff_decompress.c @@ -0,0 +1,77 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + + +/* ************************************* +* Dependencies +***************************************/ +#define ZSTD_DISABLE_DEPRECATE_WARNINGS /* suppress warning on ZSTD_initDStream_usingDict */ +#include "../zstd.h" /* ZSTD_CStream, ZSTD_DStream, ZSTDLIB_API */ +#define ZBUFF_STATIC_LINKING_ONLY +#include "zbuff.h" + + +ZBUFF_DCtx* ZBUFF_createDCtx(void) +{ + return ZSTD_createDStream(); +} + +ZBUFF_DCtx* ZBUFF_createDCtx_advanced(ZSTD_customMem customMem) +{ + return ZSTD_createDStream_advanced(customMem); +} + +size_t ZBUFF_freeDCtx(ZBUFF_DCtx* zbd) +{ + return ZSTD_freeDStream(zbd); +} + + +/* *** Initialization *** */ + +size_t ZBUFF_decompressInitDictionary(ZBUFF_DCtx* zbd, const void* dict, size_t dictSize) +{ + return ZSTD_initDStream_usingDict(zbd, dict, dictSize); +} + +size_t ZBUFF_decompressInit(ZBUFF_DCtx* zbd) +{ + return ZSTD_initDStream(zbd); +} + + +/* *** Decompression *** */ + +size_t ZBUFF_decompressContinue(ZBUFF_DCtx* zbd, + void* dst, size_t* dstCapacityPtr, + const void* src, size_t* srcSizePtr) +{ + ZSTD_outBuffer outBuff; + ZSTD_inBuffer inBuff; + size_t result; + outBuff.dst = dst; + outBuff.pos = 0; + outBuff.size = *dstCapacityPtr; + inBuff.src = src; + inBuff.pos = 0; + inBuff.size = *srcSizePtr; + result = ZSTD_decompressStream(zbd, &outBuff, &inBuff); + *dstCapacityPtr = outBuff.pos; + *srcSizePtr = inBuff.pos; + return result; +} + + +/* ************************************* +* Tool functions +***************************************/ +size_t ZBUFF_recommendedDInSize(void) { return ZSTD_DStreamInSize(); } +size_t ZBUFF_recommendedDOutSize(void) { return ZSTD_DStreamOutSize(); } diff --git a/c-blosc/internal-complibs/zstd-1.5.6/dictBuilder/cover.c b/c-blosc/internal-complibs/zstd-1.5.6/dictBuilder/cover.c new file mode 100644 index 0000000..44f9029 --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.5.6/dictBuilder/cover.c @@ -0,0 +1,1261 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/* ***************************************************************************** + * Constructs a dictionary using a heuristic based on the following paper: + * + * Liao, Petri, Moffat, Wirth + * Effective Construction of Relative Lempel-Ziv Dictionaries + * Published in WWW 2016. + * + * Adapted from code originally written by @ot (Giuseppe Ottaviano). + ******************************************************************************/ + +/*-************************************* +* Dependencies +***************************************/ +#include /* fprintf */ +#include /* malloc, free, qsort */ +#include /* memset */ +#include /* clock */ + +#ifndef ZDICT_STATIC_LINKING_ONLY +# define ZDICT_STATIC_LINKING_ONLY +#endif + +#include "../common/mem.h" /* read */ +#include "../common/pool.h" /* POOL_ctx */ +#include "../common/threading.h" /* ZSTD_pthread_mutex_t */ +#include "../common/zstd_internal.h" /* includes zstd.h */ +#include "../common/bits.h" /* ZSTD_highbit32 */ +#include "../zdict.h" +#include "cover.h" + +/*-************************************* +* Constants +***************************************/ +/** +* There are 32bit indexes used to ref samples, so limit samples size to 4GB +* on 64bit builds. +* For 32bit builds we choose 1 GB. +* Most 32bit platforms have 2GB user-mode addressable space and we allocate a large +* contiguous buffer, so 1GB is already a high limit. +*/ +#define COVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB)) +#define COVER_DEFAULT_SPLITPOINT 1.0 + +/*-************************************* +* Console display +***************************************/ +#ifndef LOCALDISPLAYLEVEL +static int g_displayLevel = 0; +#endif +#undef DISPLAY +#define DISPLAY(...) \ + { \ + fprintf(stderr, __VA_ARGS__); \ + fflush(stderr); \ + } +#undef LOCALDISPLAYLEVEL +#define LOCALDISPLAYLEVEL(displayLevel, l, ...) \ + if (displayLevel >= l) { \ + DISPLAY(__VA_ARGS__); \ + } /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */ +#undef DISPLAYLEVEL +#define DISPLAYLEVEL(l, ...) LOCALDISPLAYLEVEL(g_displayLevel, l, __VA_ARGS__) + +#ifndef LOCALDISPLAYUPDATE +static const clock_t g_refreshRate = CLOCKS_PER_SEC * 15 / 100; +static clock_t g_time = 0; +#endif +#undef LOCALDISPLAYUPDATE +#define LOCALDISPLAYUPDATE(displayLevel, l, ...) \ + if (displayLevel >= l) { \ + if ((clock() - g_time > g_refreshRate) || (displayLevel >= 4)) { \ + g_time = clock(); \ + DISPLAY(__VA_ARGS__); \ + } \ + } +#undef DISPLAYUPDATE +#define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__) + +/*-************************************* +* Hash table +*************************************** +* A small specialized hash map for storing activeDmers. +* The map does not resize, so if it becomes full it will loop forever. +* Thus, the map must be large enough to store every value. +* The map implements linear probing and keeps its load less than 0.5. +*/ + +#define MAP_EMPTY_VALUE ((U32)-1) +typedef struct COVER_map_pair_t_s { + U32 key; + U32 value; +} COVER_map_pair_t; + +typedef struct COVER_map_s { + COVER_map_pair_t *data; + U32 sizeLog; + U32 size; + U32 sizeMask; +} COVER_map_t; + +/** + * Clear the map. + */ +static void COVER_map_clear(COVER_map_t *map) { + memset(map->data, MAP_EMPTY_VALUE, map->size * sizeof(COVER_map_pair_t)); +} + +/** + * Initializes a map of the given size. + * Returns 1 on success and 0 on failure. + * The map must be destroyed with COVER_map_destroy(). + * The map is only guaranteed to be large enough to hold size elements. + */ +static int COVER_map_init(COVER_map_t *map, U32 size) { + map->sizeLog = ZSTD_highbit32(size) + 2; + map->size = (U32)1 << map->sizeLog; + map->sizeMask = map->size - 1; + map->data = (COVER_map_pair_t *)malloc(map->size * sizeof(COVER_map_pair_t)); + if (!map->data) { + map->sizeLog = 0; + map->size = 0; + return 0; + } + COVER_map_clear(map); + return 1; +} + +/** + * Internal hash function + */ +static const U32 COVER_prime4bytes = 2654435761U; +static U32 COVER_map_hash(COVER_map_t *map, U32 key) { + return (key * COVER_prime4bytes) >> (32 - map->sizeLog); +} + +/** + * Helper function that returns the index that a key should be placed into. + */ +static U32 COVER_map_index(COVER_map_t *map, U32 key) { + const U32 hash = COVER_map_hash(map, key); + U32 i; + for (i = hash;; i = (i + 1) & map->sizeMask) { + COVER_map_pair_t *pos = &map->data[i]; + if (pos->value == MAP_EMPTY_VALUE) { + return i; + } + if (pos->key == key) { + return i; + } + } +} + +/** + * Returns the pointer to the value for key. + * If key is not in the map, it is inserted and the value is set to 0. + * The map must not be full. + */ +static U32 *COVER_map_at(COVER_map_t *map, U32 key) { + COVER_map_pair_t *pos = &map->data[COVER_map_index(map, key)]; + if (pos->value == MAP_EMPTY_VALUE) { + pos->key = key; + pos->value = 0; + } + return &pos->value; +} + +/** + * Deletes key from the map if present. + */ +static void COVER_map_remove(COVER_map_t *map, U32 key) { + U32 i = COVER_map_index(map, key); + COVER_map_pair_t *del = &map->data[i]; + U32 shift = 1; + if (del->value == MAP_EMPTY_VALUE) { + return; + } + for (i = (i + 1) & map->sizeMask;; i = (i + 1) & map->sizeMask) { + COVER_map_pair_t *const pos = &map->data[i]; + /* If the position is empty we are done */ + if (pos->value == MAP_EMPTY_VALUE) { + del->value = MAP_EMPTY_VALUE; + return; + } + /* If pos can be moved to del do so */ + if (((i - COVER_map_hash(map, pos->key)) & map->sizeMask) >= shift) { + del->key = pos->key; + del->value = pos->value; + del = pos; + shift = 1; + } else { + ++shift; + } + } +} + +/** + * Destroys a map that is inited with COVER_map_init(). + */ +static void COVER_map_destroy(COVER_map_t *map) { + if (map->data) { + free(map->data); + } + map->data = NULL; + map->size = 0; +} + +/*-************************************* +* Context +***************************************/ + +typedef struct { + const BYTE *samples; + size_t *offsets; + const size_t *samplesSizes; + size_t nbSamples; + size_t nbTrainSamples; + size_t nbTestSamples; + U32 *suffix; + size_t suffixSize; + U32 *freqs; + U32 *dmerAt; + unsigned d; +} COVER_ctx_t; + +/* We need a global context for qsort... */ +static COVER_ctx_t *g_coverCtx = NULL; + +/*-************************************* +* Helper functions +***************************************/ + +/** + * Returns the sum of the sample sizes. + */ +size_t COVER_sum(const size_t *samplesSizes, unsigned nbSamples) { + size_t sum = 0; + unsigned i; + for (i = 0; i < nbSamples; ++i) { + sum += samplesSizes[i]; + } + return sum; +} + +/** + * Returns -1 if the dmer at lp is less than the dmer at rp. + * Return 0 if the dmers at lp and rp are equal. + * Returns 1 if the dmer at lp is greater than the dmer at rp. + */ +static int COVER_cmp(COVER_ctx_t *ctx, const void *lp, const void *rp) { + U32 const lhs = *(U32 const *)lp; + U32 const rhs = *(U32 const *)rp; + return memcmp(ctx->samples + lhs, ctx->samples + rhs, ctx->d); +} +/** + * Faster version for d <= 8. + */ +static int COVER_cmp8(COVER_ctx_t *ctx, const void *lp, const void *rp) { + U64 const mask = (ctx->d == 8) ? (U64)-1 : (((U64)1 << (8 * ctx->d)) - 1); + U64 const lhs = MEM_readLE64(ctx->samples + *(U32 const *)lp) & mask; + U64 const rhs = MEM_readLE64(ctx->samples + *(U32 const *)rp) & mask; + if (lhs < rhs) { + return -1; + } + return (lhs > rhs); +} + +/** + * Same as COVER_cmp() except ties are broken by pointer value + * NOTE: g_coverCtx must be set to call this function. A global is required because + * qsort doesn't take an opaque pointer. + */ +static int WIN_CDECL COVER_strict_cmp(const void *lp, const void *rp) { + int result = COVER_cmp(g_coverCtx, lp, rp); + if (result == 0) { + result = lp < rp ? -1 : 1; + } + return result; +} +/** + * Faster version for d <= 8. + */ +static int WIN_CDECL COVER_strict_cmp8(const void *lp, const void *rp) { + int result = COVER_cmp8(g_coverCtx, lp, rp); + if (result == 0) { + result = lp < rp ? -1 : 1; + } + return result; +} + +/** + * Returns the first pointer in [first, last) whose element does not compare + * less than value. If no such element exists it returns last. + */ +static const size_t *COVER_lower_bound(const size_t* first, const size_t* last, + size_t value) { + size_t count = (size_t)(last - first); + assert(last >= first); + while (count != 0) { + size_t step = count / 2; + const size_t *ptr = first; + ptr += step; + if (*ptr < value) { + first = ++ptr; + count -= step + 1; + } else { + count = step; + } + } + return first; +} + +/** + * Generic groupBy function. + * Groups an array sorted by cmp into groups with equivalent values. + * Calls grp for each group. + */ +static void +COVER_groupBy(const void *data, size_t count, size_t size, COVER_ctx_t *ctx, + int (*cmp)(COVER_ctx_t *, const void *, const void *), + void (*grp)(COVER_ctx_t *, const void *, const void *)) { + const BYTE *ptr = (const BYTE *)data; + size_t num = 0; + while (num < count) { + const BYTE *grpEnd = ptr + size; + ++num; + while (num < count && cmp(ctx, ptr, grpEnd) == 0) { + grpEnd += size; + ++num; + } + grp(ctx, ptr, grpEnd); + ptr = grpEnd; + } +} + +/*-************************************* +* Cover functions +***************************************/ + +/** + * Called on each group of positions with the same dmer. + * Counts the frequency of each dmer and saves it in the suffix array. + * Fills `ctx->dmerAt`. + */ +static void COVER_group(COVER_ctx_t *ctx, const void *group, + const void *groupEnd) { + /* The group consists of all the positions with the same first d bytes. */ + const U32 *grpPtr = (const U32 *)group; + const U32 *grpEnd = (const U32 *)groupEnd; + /* The dmerId is how we will reference this dmer. + * This allows us to map the whole dmer space to a much smaller space, the + * size of the suffix array. + */ + const U32 dmerId = (U32)(grpPtr - ctx->suffix); + /* Count the number of samples this dmer shows up in */ + U32 freq = 0; + /* Details */ + const size_t *curOffsetPtr = ctx->offsets; + const size_t *offsetsEnd = ctx->offsets + ctx->nbSamples; + /* Once *grpPtr >= curSampleEnd this occurrence of the dmer is in a + * different sample than the last. + */ + size_t curSampleEnd = ctx->offsets[0]; + for (; grpPtr != grpEnd; ++grpPtr) { + /* Save the dmerId for this position so we can get back to it. */ + ctx->dmerAt[*grpPtr] = dmerId; + /* Dictionaries only help for the first reference to the dmer. + * After that zstd can reference the match from the previous reference. + * So only count each dmer once for each sample it is in. + */ + if (*grpPtr < curSampleEnd) { + continue; + } + freq += 1; + /* Binary search to find the end of the sample *grpPtr is in. + * In the common case that grpPtr + 1 == grpEnd we can skip the binary + * search because the loop is over. + */ + if (grpPtr + 1 != grpEnd) { + const size_t *sampleEndPtr = + COVER_lower_bound(curOffsetPtr, offsetsEnd, *grpPtr); + curSampleEnd = *sampleEndPtr; + curOffsetPtr = sampleEndPtr + 1; + } + } + /* At this point we are never going to look at this segment of the suffix + * array again. We take advantage of this fact to save memory. + * We store the frequency of the dmer in the first position of the group, + * which is dmerId. + */ + ctx->suffix[dmerId] = freq; +} + + +/** + * Selects the best segment in an epoch. + * Segments of are scored according to the function: + * + * Let F(d) be the frequency of dmer d. + * Let S_i be the dmer at position i of segment S which has length k. + * + * Score(S) = F(S_1) + F(S_2) + ... + F(S_{k-d+1}) + * + * Once the dmer d is in the dictionary we set F(d) = 0. + */ +static COVER_segment_t COVER_selectSegment(const COVER_ctx_t *ctx, U32 *freqs, + COVER_map_t *activeDmers, U32 begin, + U32 end, + ZDICT_cover_params_t parameters) { + /* Constants */ + const U32 k = parameters.k; + const U32 d = parameters.d; + const U32 dmersInK = k - d + 1; + /* Try each segment (activeSegment) and save the best (bestSegment) */ + COVER_segment_t bestSegment = {0, 0, 0}; + COVER_segment_t activeSegment; + /* Reset the activeDmers in the segment */ + COVER_map_clear(activeDmers); + /* The activeSegment starts at the beginning of the epoch. */ + activeSegment.begin = begin; + activeSegment.end = begin; + activeSegment.score = 0; + /* Slide the activeSegment through the whole epoch. + * Save the best segment in bestSegment. + */ + while (activeSegment.end < end) { + /* The dmerId for the dmer at the next position */ + U32 newDmer = ctx->dmerAt[activeSegment.end]; + /* The entry in activeDmers for this dmerId */ + U32 *newDmerOcc = COVER_map_at(activeDmers, newDmer); + /* If the dmer isn't already present in the segment add its score. */ + if (*newDmerOcc == 0) { + /* The paper suggest using the L-0.5 norm, but experiments show that it + * doesn't help. + */ + activeSegment.score += freqs[newDmer]; + } + /* Add the dmer to the segment */ + activeSegment.end += 1; + *newDmerOcc += 1; + + /* If the window is now too large, drop the first position */ + if (activeSegment.end - activeSegment.begin == dmersInK + 1) { + U32 delDmer = ctx->dmerAt[activeSegment.begin]; + U32 *delDmerOcc = COVER_map_at(activeDmers, delDmer); + activeSegment.begin += 1; + *delDmerOcc -= 1; + /* If this is the last occurrence of the dmer, subtract its score */ + if (*delDmerOcc == 0) { + COVER_map_remove(activeDmers, delDmer); + activeSegment.score -= freqs[delDmer]; + } + } + + /* If this segment is the best so far save it */ + if (activeSegment.score > bestSegment.score) { + bestSegment = activeSegment; + } + } + { + /* Trim off the zero frequency head and tail from the segment. */ + U32 newBegin = bestSegment.end; + U32 newEnd = bestSegment.begin; + U32 pos; + for (pos = bestSegment.begin; pos != bestSegment.end; ++pos) { + U32 freq = freqs[ctx->dmerAt[pos]]; + if (freq != 0) { + newBegin = MIN(newBegin, pos); + newEnd = pos + 1; + } + } + bestSegment.begin = newBegin; + bestSegment.end = newEnd; + } + { + /* Zero out the frequency of each dmer covered by the chosen segment. */ + U32 pos; + for (pos = bestSegment.begin; pos != bestSegment.end; ++pos) { + freqs[ctx->dmerAt[pos]] = 0; + } + } + return bestSegment; +} + +/** + * Check the validity of the parameters. + * Returns non-zero if the parameters are valid and 0 otherwise. + */ +static int COVER_checkParameters(ZDICT_cover_params_t parameters, + size_t maxDictSize) { + /* k and d are required parameters */ + if (parameters.d == 0 || parameters.k == 0) { + return 0; + } + /* k <= maxDictSize */ + if (parameters.k > maxDictSize) { + return 0; + } + /* d <= k */ + if (parameters.d > parameters.k) { + return 0; + } + /* 0 < splitPoint <= 1 */ + if (parameters.splitPoint <= 0 || parameters.splitPoint > 1){ + return 0; + } + return 1; +} + +/** + * Clean up a context initialized with `COVER_ctx_init()`. + */ +static void COVER_ctx_destroy(COVER_ctx_t *ctx) { + if (!ctx) { + return; + } + if (ctx->suffix) { + free(ctx->suffix); + ctx->suffix = NULL; + } + if (ctx->freqs) { + free(ctx->freqs); + ctx->freqs = NULL; + } + if (ctx->dmerAt) { + free(ctx->dmerAt); + ctx->dmerAt = NULL; + } + if (ctx->offsets) { + free(ctx->offsets); + ctx->offsets = NULL; + } +} + +/** + * Prepare a context for dictionary building. + * The context is only dependent on the parameter `d` and can be used multiple + * times. + * Returns 0 on success or error code on error. + * The context must be destroyed with `COVER_ctx_destroy()`. + */ +static size_t COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer, + const size_t *samplesSizes, unsigned nbSamples, + unsigned d, double splitPoint) +{ + const BYTE *const samples = (const BYTE *)samplesBuffer; + const size_t totalSamplesSize = COVER_sum(samplesSizes, nbSamples); + /* Split samples into testing and training sets */ + const unsigned nbTrainSamples = splitPoint < 1.0 ? (unsigned)((double)nbSamples * splitPoint) : nbSamples; + const unsigned nbTestSamples = splitPoint < 1.0 ? nbSamples - nbTrainSamples : nbSamples; + const size_t trainingSamplesSize = splitPoint < 1.0 ? COVER_sum(samplesSizes, nbTrainSamples) : totalSamplesSize; + const size_t testSamplesSize = splitPoint < 1.0 ? COVER_sum(samplesSizes + nbTrainSamples, nbTestSamples) : totalSamplesSize; + /* Checks */ + if (totalSamplesSize < MAX(d, sizeof(U64)) || + totalSamplesSize >= (size_t)COVER_MAX_SAMPLES_SIZE) { + DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n", + (unsigned)(totalSamplesSize>>20), (COVER_MAX_SAMPLES_SIZE >> 20)); + return ERROR(srcSize_wrong); + } + /* Check if there are at least 5 training samples */ + if (nbTrainSamples < 5) { + DISPLAYLEVEL(1, "Total number of training samples is %u and is invalid.", nbTrainSamples); + return ERROR(srcSize_wrong); + } + /* Check if there's testing sample */ + if (nbTestSamples < 1) { + DISPLAYLEVEL(1, "Total number of testing samples is %u and is invalid.", nbTestSamples); + return ERROR(srcSize_wrong); + } + /* Zero the context */ + memset(ctx, 0, sizeof(*ctx)); + DISPLAYLEVEL(2, "Training on %u samples of total size %u\n", nbTrainSamples, + (unsigned)trainingSamplesSize); + DISPLAYLEVEL(2, "Testing on %u samples of total size %u\n", nbTestSamples, + (unsigned)testSamplesSize); + ctx->samples = samples; + ctx->samplesSizes = samplesSizes; + ctx->nbSamples = nbSamples; + ctx->nbTrainSamples = nbTrainSamples; + ctx->nbTestSamples = nbTestSamples; + /* Partial suffix array */ + ctx->suffixSize = trainingSamplesSize - MAX(d, sizeof(U64)) + 1; + ctx->suffix = (U32 *)malloc(ctx->suffixSize * sizeof(U32)); + /* Maps index to the dmerID */ + ctx->dmerAt = (U32 *)malloc(ctx->suffixSize * sizeof(U32)); + /* The offsets of each file */ + ctx->offsets = (size_t *)malloc((nbSamples + 1) * sizeof(size_t)); + if (!ctx->suffix || !ctx->dmerAt || !ctx->offsets) { + DISPLAYLEVEL(1, "Failed to allocate scratch buffers\n"); + COVER_ctx_destroy(ctx); + return ERROR(memory_allocation); + } + ctx->freqs = NULL; + ctx->d = d; + + /* Fill offsets from the samplesSizes */ + { + U32 i; + ctx->offsets[0] = 0; + for (i = 1; i <= nbSamples; ++i) { + ctx->offsets[i] = ctx->offsets[i - 1] + samplesSizes[i - 1]; + } + } + DISPLAYLEVEL(2, "Constructing partial suffix array\n"); + { + /* suffix is a partial suffix array. + * It only sorts suffixes by their first parameters.d bytes. + * The sort is stable, so each dmer group is sorted by position in input. + */ + U32 i; + for (i = 0; i < ctx->suffixSize; ++i) { + ctx->suffix[i] = i; + } + /* qsort doesn't take an opaque pointer, so pass as a global. + * On OpenBSD qsort() is not guaranteed to be stable, their mergesort() is. + */ + g_coverCtx = ctx; +#if defined(__OpenBSD__) + mergesort(ctx->suffix, ctx->suffixSize, sizeof(U32), + (ctx->d <= 8 ? &COVER_strict_cmp8 : &COVER_strict_cmp)); +#else + qsort(ctx->suffix, ctx->suffixSize, sizeof(U32), + (ctx->d <= 8 ? &COVER_strict_cmp8 : &COVER_strict_cmp)); +#endif + } + DISPLAYLEVEL(2, "Computing frequencies\n"); + /* For each dmer group (group of positions with the same first d bytes): + * 1. For each position we set dmerAt[position] = dmerID. The dmerID is + * (groupBeginPtr - suffix). This allows us to go from position to + * dmerID so we can look up values in freq. + * 2. We calculate how many samples the dmer occurs in and save it in + * freqs[dmerId]. + */ + COVER_groupBy(ctx->suffix, ctx->suffixSize, sizeof(U32), ctx, + (ctx->d <= 8 ? &COVER_cmp8 : &COVER_cmp), &COVER_group); + ctx->freqs = ctx->suffix; + ctx->suffix = NULL; + return 0; +} + +void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLevel) +{ + const double ratio = (double)nbDmers / (double)maxDictSize; + if (ratio >= 10) { + return; + } + LOCALDISPLAYLEVEL(displayLevel, 1, + "WARNING: The maximum dictionary size %u is too large " + "compared to the source size %u! " + "size(source)/size(dictionary) = %f, but it should be >= " + "10! This may lead to a subpar dictionary! We recommend " + "training on sources at least 10x, and preferably 100x " + "the size of the dictionary! \n", (U32)maxDictSize, + (U32)nbDmers, ratio); +} + +COVER_epoch_info_t COVER_computeEpochs(U32 maxDictSize, + U32 nbDmers, U32 k, U32 passes) +{ + const U32 minEpochSize = k * 10; + COVER_epoch_info_t epochs; + epochs.num = MAX(1, maxDictSize / k / passes); + epochs.size = nbDmers / epochs.num; + if (epochs.size >= minEpochSize) { + assert(epochs.size * epochs.num <= nbDmers); + return epochs; + } + epochs.size = MIN(minEpochSize, nbDmers); + epochs.num = nbDmers / epochs.size; + assert(epochs.size * epochs.num <= nbDmers); + return epochs; +} + +/** + * Given the prepared context build the dictionary. + */ +static size_t COVER_buildDictionary(const COVER_ctx_t *ctx, U32 *freqs, + COVER_map_t *activeDmers, void *dictBuffer, + size_t dictBufferCapacity, + ZDICT_cover_params_t parameters) { + BYTE *const dict = (BYTE *)dictBuffer; + size_t tail = dictBufferCapacity; + /* Divide the data into epochs. We will select one segment from each epoch. */ + const COVER_epoch_info_t epochs = COVER_computeEpochs( + (U32)dictBufferCapacity, (U32)ctx->suffixSize, parameters.k, 4); + const size_t maxZeroScoreRun = MAX(10, MIN(100, epochs.num >> 3)); + size_t zeroScoreRun = 0; + size_t epoch; + DISPLAYLEVEL(2, "Breaking content into %u epochs of size %u\n", + (U32)epochs.num, (U32)epochs.size); + /* Loop through the epochs until there are no more segments or the dictionary + * is full. + */ + for (epoch = 0; tail > 0; epoch = (epoch + 1) % epochs.num) { + const U32 epochBegin = (U32)(epoch * epochs.size); + const U32 epochEnd = epochBegin + epochs.size; + size_t segmentSize; + /* Select a segment */ + COVER_segment_t segment = COVER_selectSegment( + ctx, freqs, activeDmers, epochBegin, epochEnd, parameters); + /* If the segment covers no dmers, then we are out of content. + * There may be new content in other epochs, for continue for some time. + */ + if (segment.score == 0) { + if (++zeroScoreRun >= maxZeroScoreRun) { + break; + } + continue; + } + zeroScoreRun = 0; + /* Trim the segment if necessary and if it is too small then we are done */ + segmentSize = MIN(segment.end - segment.begin + parameters.d - 1, tail); + if (segmentSize < parameters.d) { + break; + } + /* We fill the dictionary from the back to allow the best segments to be + * referenced with the smallest offsets. + */ + tail -= segmentSize; + memcpy(dict + tail, ctx->samples + segment.begin, segmentSize); + DISPLAYUPDATE( + 2, "\r%u%% ", + (unsigned)(((dictBufferCapacity - tail) * 100) / dictBufferCapacity)); + } + DISPLAYLEVEL(2, "\r%79s\r", ""); + return tail; +} + +ZDICTLIB_STATIC_API size_t ZDICT_trainFromBuffer_cover( + void *dictBuffer, size_t dictBufferCapacity, + const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples, + ZDICT_cover_params_t parameters) +{ + BYTE* const dict = (BYTE*)dictBuffer; + COVER_ctx_t ctx; + COVER_map_t activeDmers; + parameters.splitPoint = 1.0; + /* Initialize global data */ + g_displayLevel = (int)parameters.zParams.notificationLevel; + /* Checks */ + if (!COVER_checkParameters(parameters, dictBufferCapacity)) { + DISPLAYLEVEL(1, "Cover parameters incorrect\n"); + return ERROR(parameter_outOfBound); + } + if (nbSamples == 0) { + DISPLAYLEVEL(1, "Cover must have at least one input file\n"); + return ERROR(srcSize_wrong); + } + if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) { + DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n", + ZDICT_DICTSIZE_MIN); + return ERROR(dstSize_tooSmall); + } + /* Initialize context and activeDmers */ + { + size_t const initVal = COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, + parameters.d, parameters.splitPoint); + if (ZSTD_isError(initVal)) { + return initVal; + } + } + COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize, g_displayLevel); + if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) { + DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n"); + COVER_ctx_destroy(&ctx); + return ERROR(memory_allocation); + } + + DISPLAYLEVEL(2, "Building dictionary\n"); + { + const size_t tail = + COVER_buildDictionary(&ctx, ctx.freqs, &activeDmers, dictBuffer, + dictBufferCapacity, parameters); + const size_t dictionarySize = ZDICT_finalizeDictionary( + dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail, + samplesBuffer, samplesSizes, nbSamples, parameters.zParams); + if (!ZSTD_isError(dictionarySize)) { + DISPLAYLEVEL(2, "Constructed dictionary of size %u\n", + (unsigned)dictionarySize); + } + COVER_ctx_destroy(&ctx); + COVER_map_destroy(&activeDmers); + return dictionarySize; + } +} + + + +size_t COVER_checkTotalCompressedSize(const ZDICT_cover_params_t parameters, + const size_t *samplesSizes, const BYTE *samples, + size_t *offsets, + size_t nbTrainSamples, size_t nbSamples, + BYTE *const dict, size_t dictBufferCapacity) { + size_t totalCompressedSize = ERROR(GENERIC); + /* Pointers */ + ZSTD_CCtx *cctx; + ZSTD_CDict *cdict; + void *dst; + /* Local variables */ + size_t dstCapacity; + size_t i; + /* Allocate dst with enough space to compress the maximum sized sample */ + { + size_t maxSampleSize = 0; + i = parameters.splitPoint < 1.0 ? nbTrainSamples : 0; + for (; i < nbSamples; ++i) { + maxSampleSize = MAX(samplesSizes[i], maxSampleSize); + } + dstCapacity = ZSTD_compressBound(maxSampleSize); + dst = malloc(dstCapacity); + } + /* Create the cctx and cdict */ + cctx = ZSTD_createCCtx(); + cdict = ZSTD_createCDict(dict, dictBufferCapacity, + parameters.zParams.compressionLevel); + if (!dst || !cctx || !cdict) { + goto _compressCleanup; + } + /* Compress each sample and sum their sizes (or error) */ + totalCompressedSize = dictBufferCapacity; + i = parameters.splitPoint < 1.0 ? nbTrainSamples : 0; + for (; i < nbSamples; ++i) { + const size_t size = ZSTD_compress_usingCDict( + cctx, dst, dstCapacity, samples + offsets[i], + samplesSizes[i], cdict); + if (ZSTD_isError(size)) { + totalCompressedSize = size; + goto _compressCleanup; + } + totalCompressedSize += size; + } +_compressCleanup: + ZSTD_freeCCtx(cctx); + ZSTD_freeCDict(cdict); + if (dst) { + free(dst); + } + return totalCompressedSize; +} + + +/** + * Initialize the `COVER_best_t`. + */ +void COVER_best_init(COVER_best_t *best) { + if (best==NULL) return; /* compatible with init on NULL */ + (void)ZSTD_pthread_mutex_init(&best->mutex, NULL); + (void)ZSTD_pthread_cond_init(&best->cond, NULL); + best->liveJobs = 0; + best->dict = NULL; + best->dictSize = 0; + best->compressedSize = (size_t)-1; + memset(&best->parameters, 0, sizeof(best->parameters)); +} + +/** + * Wait until liveJobs == 0. + */ +void COVER_best_wait(COVER_best_t *best) { + if (!best) { + return; + } + ZSTD_pthread_mutex_lock(&best->mutex); + while (best->liveJobs != 0) { + ZSTD_pthread_cond_wait(&best->cond, &best->mutex); + } + ZSTD_pthread_mutex_unlock(&best->mutex); +} + +/** + * Call COVER_best_wait() and then destroy the COVER_best_t. + */ +void COVER_best_destroy(COVER_best_t *best) { + if (!best) { + return; + } + COVER_best_wait(best); + if (best->dict) { + free(best->dict); + } + ZSTD_pthread_mutex_destroy(&best->mutex); + ZSTD_pthread_cond_destroy(&best->cond); +} + +/** + * Called when a thread is about to be launched. + * Increments liveJobs. + */ +void COVER_best_start(COVER_best_t *best) { + if (!best) { + return; + } + ZSTD_pthread_mutex_lock(&best->mutex); + ++best->liveJobs; + ZSTD_pthread_mutex_unlock(&best->mutex); +} + +/** + * Called when a thread finishes executing, both on error or success. + * Decrements liveJobs and signals any waiting threads if liveJobs == 0. + * If this dictionary is the best so far save it and its parameters. + */ +void COVER_best_finish(COVER_best_t* best, + ZDICT_cover_params_t parameters, + COVER_dictSelection_t selection) +{ + void* dict = selection.dictContent; + size_t compressedSize = selection.totalCompressedSize; + size_t dictSize = selection.dictSize; + if (!best) { + return; + } + { + size_t liveJobs; + ZSTD_pthread_mutex_lock(&best->mutex); + --best->liveJobs; + liveJobs = best->liveJobs; + /* If the new dictionary is better */ + if (compressedSize < best->compressedSize) { + /* Allocate space if necessary */ + if (!best->dict || best->dictSize < dictSize) { + if (best->dict) { + free(best->dict); + } + best->dict = malloc(dictSize); + if (!best->dict) { + best->compressedSize = ERROR(GENERIC); + best->dictSize = 0; + ZSTD_pthread_cond_signal(&best->cond); + ZSTD_pthread_mutex_unlock(&best->mutex); + return; + } + } + /* Save the dictionary, parameters, and size */ + if (dict) { + memcpy(best->dict, dict, dictSize); + best->dictSize = dictSize; + best->parameters = parameters; + best->compressedSize = compressedSize; + } + } + if (liveJobs == 0) { + ZSTD_pthread_cond_broadcast(&best->cond); + } + ZSTD_pthread_mutex_unlock(&best->mutex); + } +} + +static COVER_dictSelection_t setDictSelection(BYTE* buf, size_t s, size_t csz) +{ + COVER_dictSelection_t ds; + ds.dictContent = buf; + ds.dictSize = s; + ds.totalCompressedSize = csz; + return ds; +} + +COVER_dictSelection_t COVER_dictSelectionError(size_t error) { + return setDictSelection(NULL, 0, error); +} + +unsigned COVER_dictSelectionIsError(COVER_dictSelection_t selection) { + return (ZSTD_isError(selection.totalCompressedSize) || !selection.dictContent); +} + +void COVER_dictSelectionFree(COVER_dictSelection_t selection){ + free(selection.dictContent); +} + +COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, size_t dictBufferCapacity, + size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples, + size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize) { + + size_t largestDict = 0; + size_t largestCompressed = 0; + BYTE* customDictContentEnd = customDictContent + dictContentSize; + + BYTE* largestDictbuffer = (BYTE*)malloc(dictBufferCapacity); + BYTE* candidateDictBuffer = (BYTE*)malloc(dictBufferCapacity); + double regressionTolerance = ((double)params.shrinkDictMaxRegression / 100.0) + 1.00; + + if (!largestDictbuffer || !candidateDictBuffer) { + free(largestDictbuffer); + free(candidateDictBuffer); + return COVER_dictSelectionError(dictContentSize); + } + + /* Initial dictionary size and compressed size */ + memcpy(largestDictbuffer, customDictContent, dictContentSize); + dictContentSize = ZDICT_finalizeDictionary( + largestDictbuffer, dictBufferCapacity, customDictContent, dictContentSize, + samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams); + + if (ZDICT_isError(dictContentSize)) { + free(largestDictbuffer); + free(candidateDictBuffer); + return COVER_dictSelectionError(dictContentSize); + } + + totalCompressedSize = COVER_checkTotalCompressedSize(params, samplesSizes, + samplesBuffer, offsets, + nbCheckSamples, nbSamples, + largestDictbuffer, dictContentSize); + + if (ZSTD_isError(totalCompressedSize)) { + free(largestDictbuffer); + free(candidateDictBuffer); + return COVER_dictSelectionError(totalCompressedSize); + } + + if (params.shrinkDict == 0) { + free(candidateDictBuffer); + return setDictSelection(largestDictbuffer, dictContentSize, totalCompressedSize); + } + + largestDict = dictContentSize; + largestCompressed = totalCompressedSize; + dictContentSize = ZDICT_DICTSIZE_MIN; + + /* Largest dict is initially at least ZDICT_DICTSIZE_MIN */ + while (dictContentSize < largestDict) { + memcpy(candidateDictBuffer, largestDictbuffer, largestDict); + dictContentSize = ZDICT_finalizeDictionary( + candidateDictBuffer, dictBufferCapacity, customDictContentEnd - dictContentSize, dictContentSize, + samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams); + + if (ZDICT_isError(dictContentSize)) { + free(largestDictbuffer); + free(candidateDictBuffer); + return COVER_dictSelectionError(dictContentSize); + + } + + totalCompressedSize = COVER_checkTotalCompressedSize(params, samplesSizes, + samplesBuffer, offsets, + nbCheckSamples, nbSamples, + candidateDictBuffer, dictContentSize); + + if (ZSTD_isError(totalCompressedSize)) { + free(largestDictbuffer); + free(candidateDictBuffer); + return COVER_dictSelectionError(totalCompressedSize); + } + + if ((double)totalCompressedSize <= (double)largestCompressed * regressionTolerance) { + free(largestDictbuffer); + return setDictSelection( candidateDictBuffer, dictContentSize, totalCompressedSize ); + } + dictContentSize *= 2; + } + dictContentSize = largestDict; + totalCompressedSize = largestCompressed; + free(candidateDictBuffer); + return setDictSelection( largestDictbuffer, dictContentSize, totalCompressedSize ); +} + +/** + * Parameters for COVER_tryParameters(). + */ +typedef struct COVER_tryParameters_data_s { + const COVER_ctx_t *ctx; + COVER_best_t *best; + size_t dictBufferCapacity; + ZDICT_cover_params_t parameters; +} COVER_tryParameters_data_t; + +/** + * Tries a set of parameters and updates the COVER_best_t with the results. + * This function is thread safe if zstd is compiled with multithreaded support. + * It takes its parameters as an *OWNING* opaque pointer to support threading. + */ +static void COVER_tryParameters(void *opaque) +{ + /* Save parameters as local variables */ + COVER_tryParameters_data_t *const data = (COVER_tryParameters_data_t*)opaque; + const COVER_ctx_t *const ctx = data->ctx; + const ZDICT_cover_params_t parameters = data->parameters; + size_t dictBufferCapacity = data->dictBufferCapacity; + size_t totalCompressedSize = ERROR(GENERIC); + /* Allocate space for hash table, dict, and freqs */ + COVER_map_t activeDmers; + BYTE* const dict = (BYTE*)malloc(dictBufferCapacity); + COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC)); + U32* const freqs = (U32*)malloc(ctx->suffixSize * sizeof(U32)); + if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) { + DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n"); + goto _cleanup; + } + if (!dict || !freqs) { + DISPLAYLEVEL(1, "Failed to allocate buffers: out of memory\n"); + goto _cleanup; + } + /* Copy the frequencies because we need to modify them */ + memcpy(freqs, ctx->freqs, ctx->suffixSize * sizeof(U32)); + /* Build the dictionary */ + { + const size_t tail = COVER_buildDictionary(ctx, freqs, &activeDmers, dict, + dictBufferCapacity, parameters); + selection = COVER_selectDict(dict + tail, dictBufferCapacity, dictBufferCapacity - tail, + ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbTrainSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets, + totalCompressedSize); + + if (COVER_dictSelectionIsError(selection)) { + DISPLAYLEVEL(1, "Failed to select dictionary\n"); + goto _cleanup; + } + } +_cleanup: + free(dict); + COVER_best_finish(data->best, parameters, selection); + free(data); + COVER_map_destroy(&activeDmers); + COVER_dictSelectionFree(selection); + free(freqs); +} + +ZDICTLIB_STATIC_API size_t ZDICT_optimizeTrainFromBuffer_cover( + void* dictBuffer, size_t dictBufferCapacity, const void* samplesBuffer, + const size_t* samplesSizes, unsigned nbSamples, + ZDICT_cover_params_t* parameters) +{ + /* constants */ + const unsigned nbThreads = parameters->nbThreads; + const double splitPoint = + parameters->splitPoint <= 0.0 ? COVER_DEFAULT_SPLITPOINT : parameters->splitPoint; + const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d; + const unsigned kMaxD = parameters->d == 0 ? 8 : parameters->d; + const unsigned kMinK = parameters->k == 0 ? 50 : parameters->k; + const unsigned kMaxK = parameters->k == 0 ? 2000 : parameters->k; + const unsigned kSteps = parameters->steps == 0 ? 40 : parameters->steps; + const unsigned kStepSize = MAX((kMaxK - kMinK) / kSteps, 1); + const unsigned kIterations = + (1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize); + const unsigned shrinkDict = 0; + /* Local variables */ + const int displayLevel = parameters->zParams.notificationLevel; + unsigned iteration = 1; + unsigned d; + unsigned k; + COVER_best_t best; + POOL_ctx *pool = NULL; + int warned = 0; + + /* Checks */ + if (splitPoint <= 0 || splitPoint > 1) { + LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n"); + return ERROR(parameter_outOfBound); + } + if (kMinK < kMaxD || kMaxK < kMinK) { + LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n"); + return ERROR(parameter_outOfBound); + } + if (nbSamples == 0) { + DISPLAYLEVEL(1, "Cover must have at least one input file\n"); + return ERROR(srcSize_wrong); + } + if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) { + DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n", + ZDICT_DICTSIZE_MIN); + return ERROR(dstSize_tooSmall); + } + if (nbThreads > 1) { + pool = POOL_create(nbThreads, 1); + if (!pool) { + return ERROR(memory_allocation); + } + } + /* Initialization */ + COVER_best_init(&best); + /* Turn down global display level to clean up display at level 2 and below */ + g_displayLevel = displayLevel == 0 ? 0 : displayLevel - 1; + /* Loop through d first because each new value needs a new context */ + LOCALDISPLAYLEVEL(displayLevel, 2, "Trying %u different sets of parameters\n", + kIterations); + for (d = kMinD; d <= kMaxD; d += 2) { + /* Initialize the context for this value of d */ + COVER_ctx_t ctx; + LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d); + { + const size_t initVal = COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint); + if (ZSTD_isError(initVal)) { + LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n"); + COVER_best_destroy(&best); + POOL_free(pool); + return initVal; + } + } + if (!warned) { + COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize, displayLevel); + warned = 1; + } + /* Loop through k reusing the same context */ + for (k = kMinK; k <= kMaxK; k += kStepSize) { + /* Prepare the arguments */ + COVER_tryParameters_data_t *data = (COVER_tryParameters_data_t *)malloc( + sizeof(COVER_tryParameters_data_t)); + LOCALDISPLAYLEVEL(displayLevel, 3, "k=%u\n", k); + if (!data) { + LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to allocate parameters\n"); + COVER_best_destroy(&best); + COVER_ctx_destroy(&ctx); + POOL_free(pool); + return ERROR(memory_allocation); + } + data->ctx = &ctx; + data->best = &best; + data->dictBufferCapacity = dictBufferCapacity; + data->parameters = *parameters; + data->parameters.k = k; + data->parameters.d = d; + data->parameters.splitPoint = splitPoint; + data->parameters.steps = kSteps; + data->parameters.shrinkDict = shrinkDict; + data->parameters.zParams.notificationLevel = g_displayLevel; + /* Check the parameters */ + if (!COVER_checkParameters(data->parameters, dictBufferCapacity)) { + DISPLAYLEVEL(1, "Cover parameters incorrect\n"); + free(data); + continue; + } + /* Call the function and pass ownership of data to it */ + COVER_best_start(&best); + if (pool) { + POOL_add(pool, &COVER_tryParameters, data); + } else { + COVER_tryParameters(data); + } + /* Print status */ + LOCALDISPLAYUPDATE(displayLevel, 2, "\r%u%% ", + (unsigned)((iteration * 100) / kIterations)); + ++iteration; + } + COVER_best_wait(&best); + COVER_ctx_destroy(&ctx); + } + LOCALDISPLAYLEVEL(displayLevel, 2, "\r%79s\r", ""); + /* Fill the output buffer and parameters with output of the best parameters */ + { + const size_t dictSize = best.dictSize; + if (ZSTD_isError(best.compressedSize)) { + const size_t compressedSize = best.compressedSize; + COVER_best_destroy(&best); + POOL_free(pool); + return compressedSize; + } + *parameters = best.parameters; + memcpy(dictBuffer, best.dict, dictSize); + COVER_best_destroy(&best); + POOL_free(pool); + return dictSize; + } +} diff --git a/c-blosc/internal-complibs/zstd-1.5.6/dictBuilder/cover.h b/c-blosc/internal-complibs/zstd-1.5.6/dictBuilder/cover.h new file mode 100644 index 0000000..a5d7506 --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.5.6/dictBuilder/cover.h @@ -0,0 +1,152 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZDICT_STATIC_LINKING_ONLY +# define ZDICT_STATIC_LINKING_ONLY +#endif + +#include "../common/threading.h" /* ZSTD_pthread_mutex_t */ +#include "../common/mem.h" /* U32, BYTE */ +#include "../zdict.h" + +/** + * COVER_best_t is used for two purposes: + * 1. Synchronizing threads. + * 2. Saving the best parameters and dictionary. + * + * All of the methods except COVER_best_init() are thread safe if zstd is + * compiled with multithreaded support. + */ +typedef struct COVER_best_s { + ZSTD_pthread_mutex_t mutex; + ZSTD_pthread_cond_t cond; + size_t liveJobs; + void *dict; + size_t dictSize; + ZDICT_cover_params_t parameters; + size_t compressedSize; +} COVER_best_t; + +/** + * A segment is a range in the source as well as the score of the segment. + */ +typedef struct { + U32 begin; + U32 end; + U32 score; +} COVER_segment_t; + +/** + *Number of epochs and size of each epoch. + */ +typedef struct { + U32 num; + U32 size; +} COVER_epoch_info_t; + +/** + * Struct used for the dictionary selection function. + */ +typedef struct COVER_dictSelection { + BYTE* dictContent; + size_t dictSize; + size_t totalCompressedSize; +} COVER_dictSelection_t; + +/** + * Computes the number of epochs and the size of each epoch. + * We will make sure that each epoch gets at least 10 * k bytes. + * + * The COVER algorithms divide the data up into epochs of equal size and + * select one segment from each epoch. + * + * @param maxDictSize The maximum allowed dictionary size. + * @param nbDmers The number of dmers we are training on. + * @param k The parameter k (segment size). + * @param passes The target number of passes over the dmer corpus. + * More passes means a better dictionary. + */ +COVER_epoch_info_t COVER_computeEpochs(U32 maxDictSize, U32 nbDmers, + U32 k, U32 passes); + +/** + * Warns the user when their corpus is too small. + */ +void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLevel); + +/** + * Checks total compressed size of a dictionary + */ +size_t COVER_checkTotalCompressedSize(const ZDICT_cover_params_t parameters, + const size_t *samplesSizes, const BYTE *samples, + size_t *offsets, + size_t nbTrainSamples, size_t nbSamples, + BYTE *const dict, size_t dictBufferCapacity); + +/** + * Returns the sum of the sample sizes. + */ +size_t COVER_sum(const size_t *samplesSizes, unsigned nbSamples) ; + +/** + * Initialize the `COVER_best_t`. + */ +void COVER_best_init(COVER_best_t *best); + +/** + * Wait until liveJobs == 0. + */ +void COVER_best_wait(COVER_best_t *best); + +/** + * Call COVER_best_wait() and then destroy the COVER_best_t. + */ +void COVER_best_destroy(COVER_best_t *best); + +/** + * Called when a thread is about to be launched. + * Increments liveJobs. + */ +void COVER_best_start(COVER_best_t *best); + +/** + * Called when a thread finishes executing, both on error or success. + * Decrements liveJobs and signals any waiting threads if liveJobs == 0. + * If this dictionary is the best so far save it and its parameters. + */ +void COVER_best_finish(COVER_best_t *best, ZDICT_cover_params_t parameters, + COVER_dictSelection_t selection); +/** + * Error function for COVER_selectDict function. Checks if the return + * value is an error. + */ +unsigned COVER_dictSelectionIsError(COVER_dictSelection_t selection); + + /** + * Error function for COVER_selectDict function. Returns a struct where + * return.totalCompressedSize is a ZSTD error. + */ +COVER_dictSelection_t COVER_dictSelectionError(size_t error); + +/** + * Always call after selectDict is called to free up used memory from + * newly created dictionary. + */ +void COVER_dictSelectionFree(COVER_dictSelection_t selection); + +/** + * Called to finalize the dictionary and select one based on whether or not + * the shrink-dict flag was enabled. If enabled the dictionary used is the + * smallest dictionary within a specified regression of the compressed size + * from the largest dictionary. + */ + COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, size_t dictBufferCapacity, + size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples, + size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize); diff --git a/c-blosc/internal-complibs/zstd-1.5.6/dictBuilder/divsufsort.c b/c-blosc/internal-complibs/zstd-1.5.6/dictBuilder/divsufsort.c new file mode 100644 index 0000000..a2870fb --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.5.6/dictBuilder/divsufsort.c @@ -0,0 +1,1913 @@ +/* + * divsufsort.c for libdivsufsort-lite + * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/*- Compiler specifics -*/ +#ifdef __clang__ +#pragma clang diagnostic ignored "-Wshorten-64-to-32" +#endif + +#if defined(_MSC_VER) +# pragma warning(disable : 4244) +# pragma warning(disable : 4127) /* C4127 : Condition expression is constant */ +#endif + + +/*- Dependencies -*/ +#include +#include +#include + +#include "divsufsort.h" + +/*- Constants -*/ +#if defined(INLINE) +# undef INLINE +#endif +#if !defined(INLINE) +# define INLINE __inline +#endif +#if defined(ALPHABET_SIZE) && (ALPHABET_SIZE < 1) +# undef ALPHABET_SIZE +#endif +#if !defined(ALPHABET_SIZE) +# define ALPHABET_SIZE (256) +#endif +#define BUCKET_A_SIZE (ALPHABET_SIZE) +#define BUCKET_B_SIZE (ALPHABET_SIZE * ALPHABET_SIZE) +#if defined(SS_INSERTIONSORT_THRESHOLD) +# if SS_INSERTIONSORT_THRESHOLD < 1 +# undef SS_INSERTIONSORT_THRESHOLD +# define SS_INSERTIONSORT_THRESHOLD (1) +# endif +#else +# define SS_INSERTIONSORT_THRESHOLD (8) +#endif +#if defined(SS_BLOCKSIZE) +# if SS_BLOCKSIZE < 0 +# undef SS_BLOCKSIZE +# define SS_BLOCKSIZE (0) +# elif 32768 <= SS_BLOCKSIZE +# undef SS_BLOCKSIZE +# define SS_BLOCKSIZE (32767) +# endif +#else +# define SS_BLOCKSIZE (1024) +#endif +/* minstacksize = log(SS_BLOCKSIZE) / log(3) * 2 */ +#if SS_BLOCKSIZE == 0 +# define SS_MISORT_STACKSIZE (96) +#elif SS_BLOCKSIZE <= 4096 +# define SS_MISORT_STACKSIZE (16) +#else +# define SS_MISORT_STACKSIZE (24) +#endif +#define SS_SMERGE_STACKSIZE (32) +#define TR_INSERTIONSORT_THRESHOLD (8) +#define TR_STACKSIZE (64) + + +/*- Macros -*/ +#ifndef SWAP +# define SWAP(_a, _b) do { t = (_a); (_a) = (_b); (_b) = t; } while(0) +#endif /* SWAP */ +#ifndef MIN +# define MIN(_a, _b) (((_a) < (_b)) ? (_a) : (_b)) +#endif /* MIN */ +#ifndef MAX +# define MAX(_a, _b) (((_a) > (_b)) ? (_a) : (_b)) +#endif /* MAX */ +#define STACK_PUSH(_a, _b, _c, _d)\ + do {\ + assert(ssize < STACK_SIZE);\ + stack[ssize].a = (_a), stack[ssize].b = (_b),\ + stack[ssize].c = (_c), stack[ssize++].d = (_d);\ + } while(0) +#define STACK_PUSH5(_a, _b, _c, _d, _e)\ + do {\ + assert(ssize < STACK_SIZE);\ + stack[ssize].a = (_a), stack[ssize].b = (_b),\ + stack[ssize].c = (_c), stack[ssize].d = (_d), stack[ssize++].e = (_e);\ + } while(0) +#define STACK_POP(_a, _b, _c, _d)\ + do {\ + assert(0 <= ssize);\ + if(ssize == 0) { return; }\ + (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\ + (_c) = stack[ssize].c, (_d) = stack[ssize].d;\ + } while(0) +#define STACK_POP5(_a, _b, _c, _d, _e)\ + do {\ + assert(0 <= ssize);\ + if(ssize == 0) { return; }\ + (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\ + (_c) = stack[ssize].c, (_d) = stack[ssize].d, (_e) = stack[ssize].e;\ + } while(0) +#define BUCKET_A(_c0) bucket_A[(_c0)] +#if ALPHABET_SIZE == 256 +#define BUCKET_B(_c0, _c1) (bucket_B[((_c1) << 8) | (_c0)]) +#define BUCKET_BSTAR(_c0, _c1) (bucket_B[((_c0) << 8) | (_c1)]) +#else +#define BUCKET_B(_c0, _c1) (bucket_B[(_c1) * ALPHABET_SIZE + (_c0)]) +#define BUCKET_BSTAR(_c0, _c1) (bucket_B[(_c0) * ALPHABET_SIZE + (_c1)]) +#endif + + +/*- Private Functions -*/ + +static const int lg_table[256]= { + -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, + 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7 +}; + +#if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) + +static INLINE +int +ss_ilg(int n) { +#if SS_BLOCKSIZE == 0 + return (n & 0xffff0000) ? + ((n & 0xff000000) ? + 24 + lg_table[(n >> 24) & 0xff] : + 16 + lg_table[(n >> 16) & 0xff]) : + ((n & 0x0000ff00) ? + 8 + lg_table[(n >> 8) & 0xff] : + 0 + lg_table[(n >> 0) & 0xff]); +#elif SS_BLOCKSIZE < 256 + return lg_table[n]; +#else + return (n & 0xff00) ? + 8 + lg_table[(n >> 8) & 0xff] : + 0 + lg_table[(n >> 0) & 0xff]; +#endif +} + +#endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */ + +#if SS_BLOCKSIZE != 0 + +static const int sqq_table[256] = { + 0, 16, 22, 27, 32, 35, 39, 42, 45, 48, 50, 53, 55, 57, 59, 61, + 64, 65, 67, 69, 71, 73, 75, 76, 78, 80, 81, 83, 84, 86, 87, 89, + 90, 91, 93, 94, 96, 97, 98, 99, 101, 102, 103, 104, 106, 107, 108, 109, +110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, +128, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, +143, 144, 144, 145, 146, 147, 148, 149, 150, 150, 151, 152, 153, 154, 155, 155, +156, 157, 158, 159, 160, 160, 161, 162, 163, 163, 164, 165, 166, 167, 167, 168, +169, 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178, 178, 179, 180, +181, 181, 182, 183, 183, 184, 185, 185, 186, 187, 187, 188, 189, 189, 190, 191, +192, 192, 193, 193, 194, 195, 195, 196, 197, 197, 198, 199, 199, 200, 201, 201, +202, 203, 203, 204, 204, 205, 206, 206, 207, 208, 208, 209, 209, 210, 211, 211, +212, 212, 213, 214, 214, 215, 215, 216, 217, 217, 218, 218, 219, 219, 220, 221, +221, 222, 222, 223, 224, 224, 225, 225, 226, 226, 227, 227, 228, 229, 229, 230, +230, 231, 231, 232, 232, 233, 234, 234, 235, 235, 236, 236, 237, 237, 238, 238, +239, 240, 240, 241, 241, 242, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247, +247, 248, 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255 +}; + +static INLINE +int +ss_isqrt(int x) { + int y, e; + + if(x >= (SS_BLOCKSIZE * SS_BLOCKSIZE)) { return SS_BLOCKSIZE; } + e = (x & 0xffff0000) ? + ((x & 0xff000000) ? + 24 + lg_table[(x >> 24) & 0xff] : + 16 + lg_table[(x >> 16) & 0xff]) : + ((x & 0x0000ff00) ? + 8 + lg_table[(x >> 8) & 0xff] : + 0 + lg_table[(x >> 0) & 0xff]); + + if(e >= 16) { + y = sqq_table[x >> ((e - 6) - (e & 1))] << ((e >> 1) - 7); + if(e >= 24) { y = (y + 1 + x / y) >> 1; } + y = (y + 1 + x / y) >> 1; + } else if(e >= 8) { + y = (sqq_table[x >> ((e - 6) - (e & 1))] >> (7 - (e >> 1))) + 1; + } else { + return sqq_table[x] >> 4; + } + + return (x < (y * y)) ? y - 1 : y; +} + +#endif /* SS_BLOCKSIZE != 0 */ + + +/*---------------------------------------------------------------------------*/ + +/* Compares two suffixes. */ +static INLINE +int +ss_compare(const unsigned char *T, + const int *p1, const int *p2, + int depth) { + const unsigned char *U1, *U2, *U1n, *U2n; + + for(U1 = T + depth + *p1, + U2 = T + depth + *p2, + U1n = T + *(p1 + 1) + 2, + U2n = T + *(p2 + 1) + 2; + (U1 < U1n) && (U2 < U2n) && (*U1 == *U2); + ++U1, ++U2) { + } + + return U1 < U1n ? + (U2 < U2n ? *U1 - *U2 : 1) : + (U2 < U2n ? -1 : 0); +} + + +/*---------------------------------------------------------------------------*/ + +#if (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1) + +/* Insertionsort for small size groups */ +static +void +ss_insertionsort(const unsigned char *T, const int *PA, + int *first, int *last, int depth) { + int *i, *j; + int t; + int r; + + for(i = last - 2; first <= i; --i) { + for(t = *i, j = i + 1; 0 < (r = ss_compare(T, PA + t, PA + *j, depth));) { + do { *(j - 1) = *j; } while((++j < last) && (*j < 0)); + if(last <= j) { break; } + } + if(r == 0) { *j = ~*j; } + *(j - 1) = t; + } +} + +#endif /* (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1) */ + + +/*---------------------------------------------------------------------------*/ + +#if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) + +static INLINE +void +ss_fixdown(const unsigned char *Td, const int *PA, + int *SA, int i, int size) { + int j, k; + int v; + int c, d, e; + + for(v = SA[i], c = Td[PA[v]]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) { + d = Td[PA[SA[k = j++]]]; + if(d < (e = Td[PA[SA[j]]])) { k = j; d = e; } + if(d <= c) { break; } + } + SA[i] = v; +} + +/* Simple top-down heapsort. */ +static +void +ss_heapsort(const unsigned char *Td, const int *PA, int *SA, int size) { + int i, m; + int t; + + m = size; + if((size % 2) == 0) { + m--; + if(Td[PA[SA[m / 2]]] < Td[PA[SA[m]]]) { SWAP(SA[m], SA[m / 2]); } + } + + for(i = m / 2 - 1; 0 <= i; --i) { ss_fixdown(Td, PA, SA, i, m); } + if((size % 2) == 0) { SWAP(SA[0], SA[m]); ss_fixdown(Td, PA, SA, 0, m); } + for(i = m - 1; 0 < i; --i) { + t = SA[0], SA[0] = SA[i]; + ss_fixdown(Td, PA, SA, 0, i); + SA[i] = t; + } +} + + +/*---------------------------------------------------------------------------*/ + +/* Returns the median of three elements. */ +static INLINE +int * +ss_median3(const unsigned char *Td, const int *PA, + int *v1, int *v2, int *v3) { + int *t; + if(Td[PA[*v1]] > Td[PA[*v2]]) { SWAP(v1, v2); } + if(Td[PA[*v2]] > Td[PA[*v3]]) { + if(Td[PA[*v1]] > Td[PA[*v3]]) { return v1; } + else { return v3; } + } + return v2; +} + +/* Returns the median of five elements. */ +static INLINE +int * +ss_median5(const unsigned char *Td, const int *PA, + int *v1, int *v2, int *v3, int *v4, int *v5) { + int *t; + if(Td[PA[*v2]] > Td[PA[*v3]]) { SWAP(v2, v3); } + if(Td[PA[*v4]] > Td[PA[*v5]]) { SWAP(v4, v5); } + if(Td[PA[*v2]] > Td[PA[*v4]]) { SWAP(v2, v4); SWAP(v3, v5); } + if(Td[PA[*v1]] > Td[PA[*v3]]) { SWAP(v1, v3); } + if(Td[PA[*v1]] > Td[PA[*v4]]) { SWAP(v1, v4); SWAP(v3, v5); } + if(Td[PA[*v3]] > Td[PA[*v4]]) { return v4; } + return v3; +} + +/* Returns the pivot element. */ +static INLINE +int * +ss_pivot(const unsigned char *Td, const int *PA, int *first, int *last) { + int *middle; + int t; + + t = last - first; + middle = first + t / 2; + + if(t <= 512) { + if(t <= 32) { + return ss_median3(Td, PA, first, middle, last - 1); + } else { + t >>= 2; + return ss_median5(Td, PA, first, first + t, middle, last - 1 - t, last - 1); + } + } + t >>= 3; + first = ss_median3(Td, PA, first, first + t, first + (t << 1)); + middle = ss_median3(Td, PA, middle - t, middle, middle + t); + last = ss_median3(Td, PA, last - 1 - (t << 1), last - 1 - t, last - 1); + return ss_median3(Td, PA, first, middle, last); +} + + +/*---------------------------------------------------------------------------*/ + +/* Binary partition for substrings. */ +static INLINE +int * +ss_partition(const int *PA, + int *first, int *last, int depth) { + int *a, *b; + int t; + for(a = first - 1, b = last;;) { + for(; (++a < b) && ((PA[*a] + depth) >= (PA[*a + 1] + 1));) { *a = ~*a; } + for(; (a < --b) && ((PA[*b] + depth) < (PA[*b + 1] + 1));) { } + if(b <= a) { break; } + t = ~*b; + *b = *a; + *a = t; + } + if(first < a) { *first = ~*first; } + return a; +} + +/* Multikey introsort for medium size groups. */ +static +void +ss_mintrosort(const unsigned char *T, const int *PA, + int *first, int *last, + int depth) { +#define STACK_SIZE SS_MISORT_STACKSIZE + struct { int *a, *b, c; int d; } stack[STACK_SIZE]; + const unsigned char *Td; + int *a, *b, *c, *d, *e, *f; + int s, t; + int ssize; + int limit; + int v, x = 0; + + for(ssize = 0, limit = ss_ilg(last - first);;) { + + if((last - first) <= SS_INSERTIONSORT_THRESHOLD) { +#if 1 < SS_INSERTIONSORT_THRESHOLD + if(1 < (last - first)) { ss_insertionsort(T, PA, first, last, depth); } +#endif + STACK_POP(first, last, depth, limit); + continue; + } + + Td = T + depth; + if(limit-- == 0) { ss_heapsort(Td, PA, first, last - first); } + if(limit < 0) { + for(a = first + 1, v = Td[PA[*first]]; a < last; ++a) { + if((x = Td[PA[*a]]) != v) { + if(1 < (a - first)) { break; } + v = x; + first = a; + } + } + if(Td[PA[*first] - 1] < v) { + first = ss_partition(PA, first, a, depth); + } + if((a - first) <= (last - a)) { + if(1 < (a - first)) { + STACK_PUSH(a, last, depth, -1); + last = a, depth += 1, limit = ss_ilg(a - first); + } else { + first = a, limit = -1; + } + } else { + if(1 < (last - a)) { + STACK_PUSH(first, a, depth + 1, ss_ilg(a - first)); + first = a, limit = -1; + } else { + last = a, depth += 1, limit = ss_ilg(a - first); + } + } + continue; + } + + /* choose pivot */ + a = ss_pivot(Td, PA, first, last); + v = Td[PA[*a]]; + SWAP(*first, *a); + + /* partition */ + for(b = first; (++b < last) && ((x = Td[PA[*b]]) == v);) { } + if(((a = b) < last) && (x < v)) { + for(; (++b < last) && ((x = Td[PA[*b]]) <= v);) { + if(x == v) { SWAP(*b, *a); ++a; } + } + } + for(c = last; (b < --c) && ((x = Td[PA[*c]]) == v);) { } + if((b < (d = c)) && (x > v)) { + for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) { + if(x == v) { SWAP(*c, *d); --d; } + } + } + for(; b < c;) { + SWAP(*b, *c); + for(; (++b < c) && ((x = Td[PA[*b]]) <= v);) { + if(x == v) { SWAP(*b, *a); ++a; } + } + for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) { + if(x == v) { SWAP(*c, *d); --d; } + } + } + + if(a <= d) { + c = b - 1; + + if((s = a - first) > (t = b - a)) { s = t; } + for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } + if((s = d - c) > (t = last - d - 1)) { s = t; } + for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } + + a = first + (b - a), c = last - (d - c); + b = (v <= Td[PA[*a] - 1]) ? a : ss_partition(PA, a, c, depth); + + if((a - first) <= (last - c)) { + if((last - c) <= (c - b)) { + STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); + STACK_PUSH(c, last, depth, limit); + last = a; + } else if((a - first) <= (c - b)) { + STACK_PUSH(c, last, depth, limit); + STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); + last = a; + } else { + STACK_PUSH(c, last, depth, limit); + STACK_PUSH(first, a, depth, limit); + first = b, last = c, depth += 1, limit = ss_ilg(c - b); + } + } else { + if((a - first) <= (c - b)) { + STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); + STACK_PUSH(first, a, depth, limit); + first = c; + } else if((last - c) <= (c - b)) { + STACK_PUSH(first, a, depth, limit); + STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); + first = c; + } else { + STACK_PUSH(first, a, depth, limit); + STACK_PUSH(c, last, depth, limit); + first = b, last = c, depth += 1, limit = ss_ilg(c - b); + } + } + } else { + limit += 1; + if(Td[PA[*first] - 1] < v) { + first = ss_partition(PA, first, last, depth); + limit = ss_ilg(last - first); + } + depth += 1; + } + } +#undef STACK_SIZE +} + +#endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */ + + +/*---------------------------------------------------------------------------*/ + +#if SS_BLOCKSIZE != 0 + +static INLINE +void +ss_blockswap(int *a, int *b, int n) { + int t; + for(; 0 < n; --n, ++a, ++b) { + t = *a, *a = *b, *b = t; + } +} + +static INLINE +void +ss_rotate(int *first, int *middle, int *last) { + int *a, *b, t; + int l, r; + l = middle - first, r = last - middle; + for(; (0 < l) && (0 < r);) { + if(l == r) { ss_blockswap(first, middle, l); break; } + if(l < r) { + a = last - 1, b = middle - 1; + t = *a; + do { + *a-- = *b, *b-- = *a; + if(b < first) { + *a = t; + last = a; + if((r -= l + 1) <= l) { break; } + a -= 1, b = middle - 1; + t = *a; + } + } while(1); + } else { + a = first, b = middle; + t = *a; + do { + *a++ = *b, *b++ = *a; + if(last <= b) { + *a = t; + first = a + 1; + if((l -= r + 1) <= r) { break; } + a += 1, b = middle; + t = *a; + } + } while(1); + } + } +} + + +/*---------------------------------------------------------------------------*/ + +static +void +ss_inplacemerge(const unsigned char *T, const int *PA, + int *first, int *middle, int *last, + int depth) { + const int *p; + int *a, *b; + int len, half; + int q, r; + int x; + + for(;;) { + if(*(last - 1) < 0) { x = 1; p = PA + ~*(last - 1); } + else { x = 0; p = PA + *(last - 1); } + for(a = first, len = middle - first, half = len >> 1, r = -1; + 0 < len; + len = half, half >>= 1) { + b = a + half; + q = ss_compare(T, PA + ((0 <= *b) ? *b : ~*b), p, depth); + if(q < 0) { + a = b + 1; + half -= (len & 1) ^ 1; + } else { + r = q; + } + } + if(a < middle) { + if(r == 0) { *a = ~*a; } + ss_rotate(a, middle, last); + last -= middle - a; + middle = a; + if(first == middle) { break; } + } + --last; + if(x != 0) { while(*--last < 0) { } } + if(middle == last) { break; } + } +} + + +/*---------------------------------------------------------------------------*/ + +/* Merge-forward with internal buffer. */ +static +void +ss_mergeforward(const unsigned char *T, const int *PA, + int *first, int *middle, int *last, + int *buf, int depth) { + int *a, *b, *c, *bufend; + int t; + int r; + + bufend = buf + (middle - first) - 1; + ss_blockswap(buf, first, middle - first); + + for(t = *(a = first), b = buf, c = middle;;) { + r = ss_compare(T, PA + *b, PA + *c, depth); + if(r < 0) { + do { + *a++ = *b; + if(bufend <= b) { *bufend = t; return; } + *b++ = *a; + } while(*b < 0); + } else if(r > 0) { + do { + *a++ = *c, *c++ = *a; + if(last <= c) { + while(b < bufend) { *a++ = *b, *b++ = *a; } + *a = *b, *b = t; + return; + } + } while(*c < 0); + } else { + *c = ~*c; + do { + *a++ = *b; + if(bufend <= b) { *bufend = t; return; } + *b++ = *a; + } while(*b < 0); + + do { + *a++ = *c, *c++ = *a; + if(last <= c) { + while(b < bufend) { *a++ = *b, *b++ = *a; } + *a = *b, *b = t; + return; + } + } while(*c < 0); + } + } +} + +/* Merge-backward with internal buffer. */ +static +void +ss_mergebackward(const unsigned char *T, const int *PA, + int *first, int *middle, int *last, + int *buf, int depth) { + const int *p1, *p2; + int *a, *b, *c, *bufend; + int t; + int r; + int x; + + bufend = buf + (last - middle) - 1; + ss_blockswap(buf, middle, last - middle); + + x = 0; + if(*bufend < 0) { p1 = PA + ~*bufend; x |= 1; } + else { p1 = PA + *bufend; } + if(*(middle - 1) < 0) { p2 = PA + ~*(middle - 1); x |= 2; } + else { p2 = PA + *(middle - 1); } + for(t = *(a = last - 1), b = bufend, c = middle - 1;;) { + r = ss_compare(T, p1, p2, depth); + if(0 < r) { + if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; } + *a-- = *b; + if(b <= buf) { *buf = t; break; } + *b-- = *a; + if(*b < 0) { p1 = PA + ~*b; x |= 1; } + else { p1 = PA + *b; } + } else if(r < 0) { + if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; } + *a-- = *c, *c-- = *a; + if(c < first) { + while(buf < b) { *a-- = *b, *b-- = *a; } + *a = *b, *b = t; + break; + } + if(*c < 0) { p2 = PA + ~*c; x |= 2; } + else { p2 = PA + *c; } + } else { + if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; } + *a-- = ~*b; + if(b <= buf) { *buf = t; break; } + *b-- = *a; + if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; } + *a-- = *c, *c-- = *a; + if(c < first) { + while(buf < b) { *a-- = *b, *b-- = *a; } + *a = *b, *b = t; + break; + } + if(*b < 0) { p1 = PA + ~*b; x |= 1; } + else { p1 = PA + *b; } + if(*c < 0) { p2 = PA + ~*c; x |= 2; } + else { p2 = PA + *c; } + } + } +} + +/* D&C based merge. */ +static +void +ss_swapmerge(const unsigned char *T, const int *PA, + int *first, int *middle, int *last, + int *buf, int bufsize, int depth) { +#define STACK_SIZE SS_SMERGE_STACKSIZE +#define GETIDX(a) ((0 <= (a)) ? (a) : (~(a))) +#define MERGE_CHECK(a, b, c)\ + do {\ + if(((c) & 1) ||\ + (((c) & 2) && (ss_compare(T, PA + GETIDX(*((a) - 1)), PA + *(a), depth) == 0))) {\ + *(a) = ~*(a);\ + }\ + if(((c) & 4) && ((ss_compare(T, PA + GETIDX(*((b) - 1)), PA + *(b), depth) == 0))) {\ + *(b) = ~*(b);\ + }\ + } while(0) + struct { int *a, *b, *c; int d; } stack[STACK_SIZE]; + int *l, *r, *lm, *rm; + int m, len, half; + int ssize; + int check, next; + + for(check = 0, ssize = 0;;) { + if((last - middle) <= bufsize) { + if((first < middle) && (middle < last)) { + ss_mergebackward(T, PA, first, middle, last, buf, depth); + } + MERGE_CHECK(first, last, check); + STACK_POP(first, middle, last, check); + continue; + } + + if((middle - first) <= bufsize) { + if(first < middle) { + ss_mergeforward(T, PA, first, middle, last, buf, depth); + } + MERGE_CHECK(first, last, check); + STACK_POP(first, middle, last, check); + continue; + } + + for(m = 0, len = MIN(middle - first, last - middle), half = len >> 1; + 0 < len; + len = half, half >>= 1) { + if(ss_compare(T, PA + GETIDX(*(middle + m + half)), + PA + GETIDX(*(middle - m - half - 1)), depth) < 0) { + m += half + 1; + half -= (len & 1) ^ 1; + } + } + + if(0 < m) { + lm = middle - m, rm = middle + m; + ss_blockswap(lm, middle, m); + l = r = middle, next = 0; + if(rm < last) { + if(*rm < 0) { + *rm = ~*rm; + if(first < lm) { for(; *--l < 0;) { } next |= 4; } + next |= 1; + } else if(first < lm) { + for(; *r < 0; ++r) { } + next |= 2; + } + } + + if((l - first) <= (last - r)) { + STACK_PUSH(r, rm, last, (next & 3) | (check & 4)); + middle = lm, last = l, check = (check & 3) | (next & 4); + } else { + if((next & 2) && (r == middle)) { next ^= 6; } + STACK_PUSH(first, lm, l, (check & 3) | (next & 4)); + first = r, middle = rm, check = (next & 3) | (check & 4); + } + } else { + if(ss_compare(T, PA + GETIDX(*(middle - 1)), PA + *middle, depth) == 0) { + *middle = ~*middle; + } + MERGE_CHECK(first, last, check); + STACK_POP(first, middle, last, check); + } + } +#undef STACK_SIZE +} + +#endif /* SS_BLOCKSIZE != 0 */ + + +/*---------------------------------------------------------------------------*/ + +/* Substring sort */ +static +void +sssort(const unsigned char *T, const int *PA, + int *first, int *last, + int *buf, int bufsize, + int depth, int n, int lastsuffix) { + int *a; +#if SS_BLOCKSIZE != 0 + int *b, *middle, *curbuf; + int j, k, curbufsize, limit; +#endif + int i; + + if(lastsuffix != 0) { ++first; } + +#if SS_BLOCKSIZE == 0 + ss_mintrosort(T, PA, first, last, depth); +#else + if((bufsize < SS_BLOCKSIZE) && + (bufsize < (last - first)) && + (bufsize < (limit = ss_isqrt(last - first)))) { + if(SS_BLOCKSIZE < limit) { limit = SS_BLOCKSIZE; } + buf = middle = last - limit, bufsize = limit; + } else { + middle = last, limit = 0; + } + for(a = first, i = 0; SS_BLOCKSIZE < (middle - a); a += SS_BLOCKSIZE, ++i) { +#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE + ss_mintrosort(T, PA, a, a + SS_BLOCKSIZE, depth); +#elif 1 < SS_BLOCKSIZE + ss_insertionsort(T, PA, a, a + SS_BLOCKSIZE, depth); +#endif + curbufsize = last - (a + SS_BLOCKSIZE); + curbuf = a + SS_BLOCKSIZE; + if(curbufsize <= bufsize) { curbufsize = bufsize, curbuf = buf; } + for(b = a, k = SS_BLOCKSIZE, j = i; j & 1; b -= k, k <<= 1, j >>= 1) { + ss_swapmerge(T, PA, b - k, b, b + k, curbuf, curbufsize, depth); + } + } +#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE + ss_mintrosort(T, PA, a, middle, depth); +#elif 1 < SS_BLOCKSIZE + ss_insertionsort(T, PA, a, middle, depth); +#endif + for(k = SS_BLOCKSIZE; i != 0; k <<= 1, i >>= 1) { + if(i & 1) { + ss_swapmerge(T, PA, a - k, a, middle, buf, bufsize, depth); + a -= k; + } + } + if(limit != 0) { +#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE + ss_mintrosort(T, PA, middle, last, depth); +#elif 1 < SS_BLOCKSIZE + ss_insertionsort(T, PA, middle, last, depth); +#endif + ss_inplacemerge(T, PA, first, middle, last, depth); + } +#endif + + if(lastsuffix != 0) { + /* Insert last type B* suffix. */ + int PAi[2]; PAi[0] = PA[*(first - 1)], PAi[1] = n - 2; + for(a = first, i = *(first - 1); + (a < last) && ((*a < 0) || (0 < ss_compare(T, &(PAi[0]), PA + *a, depth))); + ++a) { + *(a - 1) = *a; + } + *(a - 1) = i; + } +} + + +/*---------------------------------------------------------------------------*/ + +static INLINE +int +tr_ilg(int n) { + return (n & 0xffff0000) ? + ((n & 0xff000000) ? + 24 + lg_table[(n >> 24) & 0xff] : + 16 + lg_table[(n >> 16) & 0xff]) : + ((n & 0x0000ff00) ? + 8 + lg_table[(n >> 8) & 0xff] : + 0 + lg_table[(n >> 0) & 0xff]); +} + + +/*---------------------------------------------------------------------------*/ + +/* Simple insertionsort for small size groups. */ +static +void +tr_insertionsort(const int *ISAd, int *first, int *last) { + int *a, *b; + int t, r; + + for(a = first + 1; a < last; ++a) { + for(t = *a, b = a - 1; 0 > (r = ISAd[t] - ISAd[*b]);) { + do { *(b + 1) = *b; } while((first <= --b) && (*b < 0)); + if(b < first) { break; } + } + if(r == 0) { *b = ~*b; } + *(b + 1) = t; + } +} + + +/*---------------------------------------------------------------------------*/ + +static INLINE +void +tr_fixdown(const int *ISAd, int *SA, int i, int size) { + int j, k; + int v; + int c, d, e; + + for(v = SA[i], c = ISAd[v]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) { + d = ISAd[SA[k = j++]]; + if(d < (e = ISAd[SA[j]])) { k = j; d = e; } + if(d <= c) { break; } + } + SA[i] = v; +} + +/* Simple top-down heapsort. */ +static +void +tr_heapsort(const int *ISAd, int *SA, int size) { + int i, m; + int t; + + m = size; + if((size % 2) == 0) { + m--; + if(ISAd[SA[m / 2]] < ISAd[SA[m]]) { SWAP(SA[m], SA[m / 2]); } + } + + for(i = m / 2 - 1; 0 <= i; --i) { tr_fixdown(ISAd, SA, i, m); } + if((size % 2) == 0) { SWAP(SA[0], SA[m]); tr_fixdown(ISAd, SA, 0, m); } + for(i = m - 1; 0 < i; --i) { + t = SA[0], SA[0] = SA[i]; + tr_fixdown(ISAd, SA, 0, i); + SA[i] = t; + } +} + + +/*---------------------------------------------------------------------------*/ + +/* Returns the median of three elements. */ +static INLINE +int * +tr_median3(const int *ISAd, int *v1, int *v2, int *v3) { + int *t; + if(ISAd[*v1] > ISAd[*v2]) { SWAP(v1, v2); } + if(ISAd[*v2] > ISAd[*v3]) { + if(ISAd[*v1] > ISAd[*v3]) { return v1; } + else { return v3; } + } + return v2; +} + +/* Returns the median of five elements. */ +static INLINE +int * +tr_median5(const int *ISAd, + int *v1, int *v2, int *v3, int *v4, int *v5) { + int *t; + if(ISAd[*v2] > ISAd[*v3]) { SWAP(v2, v3); } + if(ISAd[*v4] > ISAd[*v5]) { SWAP(v4, v5); } + if(ISAd[*v2] > ISAd[*v4]) { SWAP(v2, v4); SWAP(v3, v5); } + if(ISAd[*v1] > ISAd[*v3]) { SWAP(v1, v3); } + if(ISAd[*v1] > ISAd[*v4]) { SWAP(v1, v4); SWAP(v3, v5); } + if(ISAd[*v3] > ISAd[*v4]) { return v4; } + return v3; +} + +/* Returns the pivot element. */ +static INLINE +int * +tr_pivot(const int *ISAd, int *first, int *last) { + int *middle; + int t; + + t = last - first; + middle = first + t / 2; + + if(t <= 512) { + if(t <= 32) { + return tr_median3(ISAd, first, middle, last - 1); + } else { + t >>= 2; + return tr_median5(ISAd, first, first + t, middle, last - 1 - t, last - 1); + } + } + t >>= 3; + first = tr_median3(ISAd, first, first + t, first + (t << 1)); + middle = tr_median3(ISAd, middle - t, middle, middle + t); + last = tr_median3(ISAd, last - 1 - (t << 1), last - 1 - t, last - 1); + return tr_median3(ISAd, first, middle, last); +} + + +/*---------------------------------------------------------------------------*/ + +typedef struct _trbudget_t trbudget_t; +struct _trbudget_t { + int chance; + int remain; + int incval; + int count; +}; + +static INLINE +void +trbudget_init(trbudget_t *budget, int chance, int incval) { + budget->chance = chance; + budget->remain = budget->incval = incval; +} + +static INLINE +int +trbudget_check(trbudget_t *budget, int size) { + if(size <= budget->remain) { budget->remain -= size; return 1; } + if(budget->chance == 0) { budget->count += size; return 0; } + budget->remain += budget->incval - size; + budget->chance -= 1; + return 1; +} + + +/*---------------------------------------------------------------------------*/ + +static INLINE +void +tr_partition(const int *ISAd, + int *first, int *middle, int *last, + int **pa, int **pb, int v) { + int *a, *b, *c, *d, *e, *f; + int t, s; + int x = 0; + + for(b = middle - 1; (++b < last) && ((x = ISAd[*b]) == v);) { } + if(((a = b) < last) && (x < v)) { + for(; (++b < last) && ((x = ISAd[*b]) <= v);) { + if(x == v) { SWAP(*b, *a); ++a; } + } + } + for(c = last; (b < --c) && ((x = ISAd[*c]) == v);) { } + if((b < (d = c)) && (x > v)) { + for(; (b < --c) && ((x = ISAd[*c]) >= v);) { + if(x == v) { SWAP(*c, *d); --d; } + } + } + for(; b < c;) { + SWAP(*b, *c); + for(; (++b < c) && ((x = ISAd[*b]) <= v);) { + if(x == v) { SWAP(*b, *a); ++a; } + } + for(; (b < --c) && ((x = ISAd[*c]) >= v);) { + if(x == v) { SWAP(*c, *d); --d; } + } + } + + if(a <= d) { + c = b - 1; + if((s = a - first) > (t = b - a)) { s = t; } + for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } + if((s = d - c) > (t = last - d - 1)) { s = t; } + for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } + first += (b - a), last -= (d - c); + } + *pa = first, *pb = last; +} + +static +void +tr_copy(int *ISA, const int *SA, + int *first, int *a, int *b, int *last, + int depth) { + /* sort suffixes of middle partition + by using sorted order of suffixes of left and right partition. */ + int *c, *d, *e; + int s, v; + + v = b - SA - 1; + for(c = first, d = a - 1; c <= d; ++c) { + if((0 <= (s = *c - depth)) && (ISA[s] == v)) { + *++d = s; + ISA[s] = d - SA; + } + } + for(c = last - 1, e = d + 1, d = b; e < d; --c) { + if((0 <= (s = *c - depth)) && (ISA[s] == v)) { + *--d = s; + ISA[s] = d - SA; + } + } +} + +static +void +tr_partialcopy(int *ISA, const int *SA, + int *first, int *a, int *b, int *last, + int depth) { + int *c, *d, *e; + int s, v; + int rank, lastrank, newrank = -1; + + v = b - SA - 1; + lastrank = -1; + for(c = first, d = a - 1; c <= d; ++c) { + if((0 <= (s = *c - depth)) && (ISA[s] == v)) { + *++d = s; + rank = ISA[s + depth]; + if(lastrank != rank) { lastrank = rank; newrank = d - SA; } + ISA[s] = newrank; + } + } + + lastrank = -1; + for(e = d; first <= e; --e) { + rank = ISA[*e]; + if(lastrank != rank) { lastrank = rank; newrank = e - SA; } + if(newrank != rank) { ISA[*e] = newrank; } + } + + lastrank = -1; + for(c = last - 1, e = d + 1, d = b; e < d; --c) { + if((0 <= (s = *c - depth)) && (ISA[s] == v)) { + *--d = s; + rank = ISA[s + depth]; + if(lastrank != rank) { lastrank = rank; newrank = d - SA; } + ISA[s] = newrank; + } + } +} + +static +void +tr_introsort(int *ISA, const int *ISAd, + int *SA, int *first, int *last, + trbudget_t *budget) { +#define STACK_SIZE TR_STACKSIZE + struct { const int *a; int *b, *c; int d, e; }stack[STACK_SIZE]; + int *a, *b, *c; + int t; + int v, x = 0; + int incr = ISAd - ISA; + int limit, next; + int ssize, trlink = -1; + + for(ssize = 0, limit = tr_ilg(last - first);;) { + + if(limit < 0) { + if(limit == -1) { + /* tandem repeat partition */ + tr_partition(ISAd - incr, first, first, last, &a, &b, last - SA - 1); + + /* update ranks */ + if(a < last) { + for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; } + } + if(b < last) { + for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; } + } + + /* push */ + if(1 < (b - a)) { + STACK_PUSH5(NULL, a, b, 0, 0); + STACK_PUSH5(ISAd - incr, first, last, -2, trlink); + trlink = ssize - 2; + } + if((a - first) <= (last - b)) { + if(1 < (a - first)) { + STACK_PUSH5(ISAd, b, last, tr_ilg(last - b), trlink); + last = a, limit = tr_ilg(a - first); + } else if(1 < (last - b)) { + first = b, limit = tr_ilg(last - b); + } else { + STACK_POP5(ISAd, first, last, limit, trlink); + } + } else { + if(1 < (last - b)) { + STACK_PUSH5(ISAd, first, a, tr_ilg(a - first), trlink); + first = b, limit = tr_ilg(last - b); + } else if(1 < (a - first)) { + last = a, limit = tr_ilg(a - first); + } else { + STACK_POP5(ISAd, first, last, limit, trlink); + } + } + } else if(limit == -2) { + /* tandem repeat copy */ + a = stack[--ssize].b, b = stack[ssize].c; + if(stack[ssize].d == 0) { + tr_copy(ISA, SA, first, a, b, last, ISAd - ISA); + } else { + if(0 <= trlink) { stack[trlink].d = -1; } + tr_partialcopy(ISA, SA, first, a, b, last, ISAd - ISA); + } + STACK_POP5(ISAd, first, last, limit, trlink); + } else { + /* sorted partition */ + if(0 <= *first) { + a = first; + do { ISA[*a] = a - SA; } while((++a < last) && (0 <= *a)); + first = a; + } + if(first < last) { + a = first; do { *a = ~*a; } while(*++a < 0); + next = (ISA[*a] != ISAd[*a]) ? tr_ilg(a - first + 1) : -1; + if(++a < last) { for(b = first, v = a - SA - 1; b < a; ++b) { ISA[*b] = v; } } + + /* push */ + if(trbudget_check(budget, a - first)) { + if((a - first) <= (last - a)) { + STACK_PUSH5(ISAd, a, last, -3, trlink); + ISAd += incr, last = a, limit = next; + } else { + if(1 < (last - a)) { + STACK_PUSH5(ISAd + incr, first, a, next, trlink); + first = a, limit = -3; + } else { + ISAd += incr, last = a, limit = next; + } + } + } else { + if(0 <= trlink) { stack[trlink].d = -1; } + if(1 < (last - a)) { + first = a, limit = -3; + } else { + STACK_POP5(ISAd, first, last, limit, trlink); + } + } + } else { + STACK_POP5(ISAd, first, last, limit, trlink); + } + } + continue; + } + + if((last - first) <= TR_INSERTIONSORT_THRESHOLD) { + tr_insertionsort(ISAd, first, last); + limit = -3; + continue; + } + + if(limit-- == 0) { + tr_heapsort(ISAd, first, last - first); + for(a = last - 1; first < a; a = b) { + for(x = ISAd[*a], b = a - 1; (first <= b) && (ISAd[*b] == x); --b) { *b = ~*b; } + } + limit = -3; + continue; + } + + /* choose pivot */ + a = tr_pivot(ISAd, first, last); + SWAP(*first, *a); + v = ISAd[*first]; + + /* partition */ + tr_partition(ISAd, first, first + 1, last, &a, &b, v); + if((last - first) != (b - a)) { + next = (ISA[*a] != v) ? tr_ilg(b - a) : -1; + + /* update ranks */ + for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; } + if(b < last) { for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; } } + + /* push */ + if((1 < (b - a)) && (trbudget_check(budget, b - a))) { + if((a - first) <= (last - b)) { + if((last - b) <= (b - a)) { + if(1 < (a - first)) { + STACK_PUSH5(ISAd + incr, a, b, next, trlink); + STACK_PUSH5(ISAd, b, last, limit, trlink); + last = a; + } else if(1 < (last - b)) { + STACK_PUSH5(ISAd + incr, a, b, next, trlink); + first = b; + } else { + ISAd += incr, first = a, last = b, limit = next; + } + } else if((a - first) <= (b - a)) { + if(1 < (a - first)) { + STACK_PUSH5(ISAd, b, last, limit, trlink); + STACK_PUSH5(ISAd + incr, a, b, next, trlink); + last = a; + } else { + STACK_PUSH5(ISAd, b, last, limit, trlink); + ISAd += incr, first = a, last = b, limit = next; + } + } else { + STACK_PUSH5(ISAd, b, last, limit, trlink); + STACK_PUSH5(ISAd, first, a, limit, trlink); + ISAd += incr, first = a, last = b, limit = next; + } + } else { + if((a - first) <= (b - a)) { + if(1 < (last - b)) { + STACK_PUSH5(ISAd + incr, a, b, next, trlink); + STACK_PUSH5(ISAd, first, a, limit, trlink); + first = b; + } else if(1 < (a - first)) { + STACK_PUSH5(ISAd + incr, a, b, next, trlink); + last = a; + } else { + ISAd += incr, first = a, last = b, limit = next; + } + } else if((last - b) <= (b - a)) { + if(1 < (last - b)) { + STACK_PUSH5(ISAd, first, a, limit, trlink); + STACK_PUSH5(ISAd + incr, a, b, next, trlink); + first = b; + } else { + STACK_PUSH5(ISAd, first, a, limit, trlink); + ISAd += incr, first = a, last = b, limit = next; + } + } else { + STACK_PUSH5(ISAd, first, a, limit, trlink); + STACK_PUSH5(ISAd, b, last, limit, trlink); + ISAd += incr, first = a, last = b, limit = next; + } + } + } else { + if((1 < (b - a)) && (0 <= trlink)) { stack[trlink].d = -1; } + if((a - first) <= (last - b)) { + if(1 < (a - first)) { + STACK_PUSH5(ISAd, b, last, limit, trlink); + last = a; + } else if(1 < (last - b)) { + first = b; + } else { + STACK_POP5(ISAd, first, last, limit, trlink); + } + } else { + if(1 < (last - b)) { + STACK_PUSH5(ISAd, first, a, limit, trlink); + first = b; + } else if(1 < (a - first)) { + last = a; + } else { + STACK_POP5(ISAd, first, last, limit, trlink); + } + } + } + } else { + if(trbudget_check(budget, last - first)) { + limit = tr_ilg(last - first), ISAd += incr; + } else { + if(0 <= trlink) { stack[trlink].d = -1; } + STACK_POP5(ISAd, first, last, limit, trlink); + } + } + } +#undef STACK_SIZE +} + + + +/*---------------------------------------------------------------------------*/ + +/* Tandem repeat sort */ +static +void +trsort(int *ISA, int *SA, int n, int depth) { + int *ISAd; + int *first, *last; + trbudget_t budget; + int t, skip, unsorted; + + trbudget_init(&budget, tr_ilg(n) * 2 / 3, n); +/* trbudget_init(&budget, tr_ilg(n) * 3 / 4, n); */ + for(ISAd = ISA + depth; -n < *SA; ISAd += ISAd - ISA) { + first = SA; + skip = 0; + unsorted = 0; + do { + if((t = *first) < 0) { first -= t; skip += t; } + else { + if(skip != 0) { *(first + skip) = skip; skip = 0; } + last = SA + ISA[t] + 1; + if(1 < (last - first)) { + budget.count = 0; + tr_introsort(ISA, ISAd, SA, first, last, &budget); + if(budget.count != 0) { unsorted += budget.count; } + else { skip = first - last; } + } else if((last - first) == 1) { + skip = -1; + } + first = last; + } + } while(first < (SA + n)); + if(skip != 0) { *(first + skip) = skip; } + if(unsorted == 0) { break; } + } +} + + +/*---------------------------------------------------------------------------*/ + +/* Sorts suffixes of type B*. */ +static +int +sort_typeBstar(const unsigned char *T, int *SA, + int *bucket_A, int *bucket_B, + int n, int openMP) { + int *PAb, *ISAb, *buf; +#ifdef LIBBSC_OPENMP + int *curbuf; + int l; +#endif + int i, j, k, t, m, bufsize; + int c0, c1; +#ifdef LIBBSC_OPENMP + int d0, d1; +#endif + (void)openMP; + + /* Initialize bucket arrays. */ + for(i = 0; i < BUCKET_A_SIZE; ++i) { bucket_A[i] = 0; } + for(i = 0; i < BUCKET_B_SIZE; ++i) { bucket_B[i] = 0; } + + /* Count the number of occurrences of the first one or two characters of each + type A, B and B* suffix. Moreover, store the beginning position of all + type B* suffixes into the array SA. */ + for(i = n - 1, m = n, c0 = T[n - 1]; 0 <= i;) { + /* type A suffix. */ + do { ++BUCKET_A(c1 = c0); } while((0 <= --i) && ((c0 = T[i]) >= c1)); + if(0 <= i) { + /* type B* suffix. */ + ++BUCKET_BSTAR(c0, c1); + SA[--m] = i; + /* type B suffix. */ + for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) { + ++BUCKET_B(c0, c1); + } + } + } + m = n - m; +/* +note: + A type B* suffix is lexicographically smaller than a type B suffix that + begins with the same first two characters. +*/ + + /* Calculate the index of start/end point of each bucket. */ + for(c0 = 0, i = 0, j = 0; c0 < ALPHABET_SIZE; ++c0) { + t = i + BUCKET_A(c0); + BUCKET_A(c0) = i + j; /* start point */ + i = t + BUCKET_B(c0, c0); + for(c1 = c0 + 1; c1 < ALPHABET_SIZE; ++c1) { + j += BUCKET_BSTAR(c0, c1); + BUCKET_BSTAR(c0, c1) = j; /* end point */ + i += BUCKET_B(c0, c1); + } + } + + if(0 < m) { + /* Sort the type B* suffixes by their first two characters. */ + PAb = SA + n - m; ISAb = SA + m; + for(i = m - 2; 0 <= i; --i) { + t = PAb[i], c0 = T[t], c1 = T[t + 1]; + SA[--BUCKET_BSTAR(c0, c1)] = i; + } + t = PAb[m - 1], c0 = T[t], c1 = T[t + 1]; + SA[--BUCKET_BSTAR(c0, c1)] = m - 1; + + /* Sort the type B* substrings using sssort. */ +#ifdef LIBBSC_OPENMP + if (openMP) + { + buf = SA + m; + c0 = ALPHABET_SIZE - 2, c1 = ALPHABET_SIZE - 1, j = m; +#pragma omp parallel default(shared) private(bufsize, curbuf, k, l, d0, d1) + { + bufsize = (n - (2 * m)) / omp_get_num_threads(); + curbuf = buf + omp_get_thread_num() * bufsize; + k = 0; + for(;;) { + #pragma omp critical(sssort_lock) + { + if(0 < (l = j)) { + d0 = c0, d1 = c1; + do { + k = BUCKET_BSTAR(d0, d1); + if(--d1 <= d0) { + d1 = ALPHABET_SIZE - 1; + if(--d0 < 0) { break; } + } + } while(((l - k) <= 1) && (0 < (l = k))); + c0 = d0, c1 = d1, j = k; + } + } + if(l == 0) { break; } + sssort(T, PAb, SA + k, SA + l, + curbuf, bufsize, 2, n, *(SA + k) == (m - 1)); + } + } + } + else + { + buf = SA + m, bufsize = n - (2 * m); + for(c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) { + for(c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) { + i = BUCKET_BSTAR(c0, c1); + if(1 < (j - i)) { + sssort(T, PAb, SA + i, SA + j, + buf, bufsize, 2, n, *(SA + i) == (m - 1)); + } + } + } + } +#else + buf = SA + m, bufsize = n - (2 * m); + for(c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) { + for(c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) { + i = BUCKET_BSTAR(c0, c1); + if(1 < (j - i)) { + sssort(T, PAb, SA + i, SA + j, + buf, bufsize, 2, n, *(SA + i) == (m - 1)); + } + } + } +#endif + + /* Compute ranks of type B* substrings. */ + for(i = m - 1; 0 <= i; --i) { + if(0 <= SA[i]) { + j = i; + do { ISAb[SA[i]] = i; } while((0 <= --i) && (0 <= SA[i])); + SA[i + 1] = i - j; + if(i <= 0) { break; } + } + j = i; + do { ISAb[SA[i] = ~SA[i]] = j; } while(SA[--i] < 0); + ISAb[SA[i]] = j; + } + + /* Construct the inverse suffix array of type B* suffixes using trsort. */ + trsort(ISAb, SA, m, 1); + + /* Set the sorted order of type B* suffixes. */ + for(i = n - 1, j = m, c0 = T[n - 1]; 0 <= i;) { + for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) >= c1); --i, c1 = c0) { } + if(0 <= i) { + t = i; + for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) { } + SA[ISAb[--j]] = ((t == 0) || (1 < (t - i))) ? t : ~t; + } + } + + /* Calculate the index of start/end point of each bucket. */ + BUCKET_B(ALPHABET_SIZE - 1, ALPHABET_SIZE - 1) = n; /* end point */ + for(c0 = ALPHABET_SIZE - 2, k = m - 1; 0 <= c0; --c0) { + i = BUCKET_A(c0 + 1) - 1; + for(c1 = ALPHABET_SIZE - 1; c0 < c1; --c1) { + t = i - BUCKET_B(c0, c1); + BUCKET_B(c0, c1) = i; /* end point */ + + /* Move all type B* suffixes to the correct position. */ + for(i = t, j = BUCKET_BSTAR(c0, c1); + j <= k; + --i, --k) { SA[i] = SA[k]; } + } + BUCKET_BSTAR(c0, c0 + 1) = i - BUCKET_B(c0, c0) + 1; /* start point */ + BUCKET_B(c0, c0) = i; /* end point */ + } + } + + return m; +} + +/* Constructs the suffix array by using the sorted order of type B* suffixes. */ +static +void +construct_SA(const unsigned char *T, int *SA, + int *bucket_A, int *bucket_B, + int n, int m) { + int *i, *j, *k; + int s; + int c0, c1, c2; + + if(0 < m) { + /* Construct the sorted order of type B suffixes by using + the sorted order of type B* suffixes. */ + for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) { + /* Scan the suffix array from right to left. */ + for(i = SA + BUCKET_BSTAR(c1, c1 + 1), + j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1; + i <= j; + --j) { + if(0 < (s = *j)) { + assert(T[s] == c1); + assert(((s + 1) < n) && (T[s] <= T[s + 1])); + assert(T[s - 1] <= T[s]); + *j = ~s; + c0 = T[--s]; + if((0 < s) && (T[s - 1] > c0)) { s = ~s; } + if(c0 != c2) { + if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; } + k = SA + BUCKET_B(c2 = c0, c1); + } + assert(k < j); assert(k != NULL); + *k-- = s; + } else { + assert(((s == 0) && (T[s] == c1)) || (s < 0)); + *j = ~s; + } + } + } + } + + /* Construct the suffix array by using + the sorted order of type B suffixes. */ + k = SA + BUCKET_A(c2 = T[n - 1]); + *k++ = (T[n - 2] < c2) ? ~(n - 1) : (n - 1); + /* Scan the suffix array from left to right. */ + for(i = SA, j = SA + n; i < j; ++i) { + if(0 < (s = *i)) { + assert(T[s - 1] >= T[s]); + c0 = T[--s]; + if((s == 0) || (T[s - 1] < c0)) { s = ~s; } + if(c0 != c2) { + BUCKET_A(c2) = k - SA; + k = SA + BUCKET_A(c2 = c0); + } + assert(i < k); + *k++ = s; + } else { + assert(s < 0); + *i = ~s; + } + } +} + +/* Constructs the burrows-wheeler transformed string directly + by using the sorted order of type B* suffixes. */ +static +int +construct_BWT(const unsigned char *T, int *SA, + int *bucket_A, int *bucket_B, + int n, int m) { + int *i, *j, *k, *orig; + int s; + int c0, c1, c2; + + if(0 < m) { + /* Construct the sorted order of type B suffixes by using + the sorted order of type B* suffixes. */ + for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) { + /* Scan the suffix array from right to left. */ + for(i = SA + BUCKET_BSTAR(c1, c1 + 1), + j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1; + i <= j; + --j) { + if(0 < (s = *j)) { + assert(T[s] == c1); + assert(((s + 1) < n) && (T[s] <= T[s + 1])); + assert(T[s - 1] <= T[s]); + c0 = T[--s]; + *j = ~((int)c0); + if((0 < s) && (T[s - 1] > c0)) { s = ~s; } + if(c0 != c2) { + if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; } + k = SA + BUCKET_B(c2 = c0, c1); + } + assert(k < j); assert(k != NULL); + *k-- = s; + } else if(s != 0) { + *j = ~s; +#ifndef NDEBUG + } else { + assert(T[s] == c1); +#endif + } + } + } + } + + /* Construct the BWTed string by using + the sorted order of type B suffixes. */ + k = SA + BUCKET_A(c2 = T[n - 1]); + *k++ = (T[n - 2] < c2) ? ~((int)T[n - 2]) : (n - 1); + /* Scan the suffix array from left to right. */ + for(i = SA, j = SA + n, orig = SA; i < j; ++i) { + if(0 < (s = *i)) { + assert(T[s - 1] >= T[s]); + c0 = T[--s]; + *i = c0; + if((0 < s) && (T[s - 1] < c0)) { s = ~((int)T[s - 1]); } + if(c0 != c2) { + BUCKET_A(c2) = k - SA; + k = SA + BUCKET_A(c2 = c0); + } + assert(i < k); + *k++ = s; + } else if(s != 0) { + *i = ~s; + } else { + orig = i; + } + } + + return orig - SA; +} + +/* Constructs the burrows-wheeler transformed string directly + by using the sorted order of type B* suffixes. */ +static +int +construct_BWT_indexes(const unsigned char *T, int *SA, + int *bucket_A, int *bucket_B, + int n, int m, + unsigned char * num_indexes, int * indexes) { + int *i, *j, *k, *orig; + int s; + int c0, c1, c2; + + int mod = n / 8; + { + mod |= mod >> 1; mod |= mod >> 2; + mod |= mod >> 4; mod |= mod >> 8; + mod |= mod >> 16; mod >>= 1; + + *num_indexes = (unsigned char)((n - 1) / (mod + 1)); + } + + if(0 < m) { + /* Construct the sorted order of type B suffixes by using + the sorted order of type B* suffixes. */ + for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) { + /* Scan the suffix array from right to left. */ + for(i = SA + BUCKET_BSTAR(c1, c1 + 1), + j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1; + i <= j; + --j) { + if(0 < (s = *j)) { + assert(T[s] == c1); + assert(((s + 1) < n) && (T[s] <= T[s + 1])); + assert(T[s - 1] <= T[s]); + + if ((s & mod) == 0) indexes[s / (mod + 1) - 1] = j - SA; + + c0 = T[--s]; + *j = ~((int)c0); + if((0 < s) && (T[s - 1] > c0)) { s = ~s; } + if(c0 != c2) { + if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; } + k = SA + BUCKET_B(c2 = c0, c1); + } + assert(k < j); assert(k != NULL); + *k-- = s; + } else if(s != 0) { + *j = ~s; +#ifndef NDEBUG + } else { + assert(T[s] == c1); +#endif + } + } + } + } + + /* Construct the BWTed string by using + the sorted order of type B suffixes. */ + k = SA + BUCKET_A(c2 = T[n - 1]); + if (T[n - 2] < c2) { + if (((n - 1) & mod) == 0) indexes[(n - 1) / (mod + 1) - 1] = k - SA; + *k++ = ~((int)T[n - 2]); + } + else { + *k++ = n - 1; + } + + /* Scan the suffix array from left to right. */ + for(i = SA, j = SA + n, orig = SA; i < j; ++i) { + if(0 < (s = *i)) { + assert(T[s - 1] >= T[s]); + + if ((s & mod) == 0) indexes[s / (mod + 1) - 1] = i - SA; + + c0 = T[--s]; + *i = c0; + if(c0 != c2) { + BUCKET_A(c2) = k - SA; + k = SA + BUCKET_A(c2 = c0); + } + assert(i < k); + if((0 < s) && (T[s - 1] < c0)) { + if ((s & mod) == 0) indexes[s / (mod + 1) - 1] = k - SA; + *k++ = ~((int)T[s - 1]); + } else + *k++ = s; + } else if(s != 0) { + *i = ~s; + } else { + orig = i; + } + } + + return orig - SA; +} + + +/*---------------------------------------------------------------------------*/ + +/*- Function -*/ + +int +divsufsort(const unsigned char *T, int *SA, int n, int openMP) { + int *bucket_A, *bucket_B; + int m; + int err = 0; + + /* Check arguments. */ + if((T == NULL) || (SA == NULL) || (n < 0)) { return -1; } + else if(n == 0) { return 0; } + else if(n == 1) { SA[0] = 0; return 0; } + else if(n == 2) { m = (T[0] < T[1]); SA[m ^ 1] = 0, SA[m] = 1; return 0; } + + bucket_A = (int *)malloc(BUCKET_A_SIZE * sizeof(int)); + bucket_B = (int *)malloc(BUCKET_B_SIZE * sizeof(int)); + + /* Suffixsort. */ + if((bucket_A != NULL) && (bucket_B != NULL)) { + m = sort_typeBstar(T, SA, bucket_A, bucket_B, n, openMP); + construct_SA(T, SA, bucket_A, bucket_B, n, m); + } else { + err = -2; + } + + free(bucket_B); + free(bucket_A); + + return err; +} + +int +divbwt(const unsigned char *T, unsigned char *U, int *A, int n, unsigned char * num_indexes, int * indexes, int openMP) { + int *B; + int *bucket_A, *bucket_B; + int m, pidx, i; + + /* Check arguments. */ + if((T == NULL) || (U == NULL) || (n < 0)) { return -1; } + else if(n <= 1) { if(n == 1) { U[0] = T[0]; } return n; } + + if((B = A) == NULL) { B = (int *)malloc((size_t)(n + 1) * sizeof(int)); } + bucket_A = (int *)malloc(BUCKET_A_SIZE * sizeof(int)); + bucket_B = (int *)malloc(BUCKET_B_SIZE * sizeof(int)); + + /* Burrows-Wheeler Transform. */ + if((B != NULL) && (bucket_A != NULL) && (bucket_B != NULL)) { + m = sort_typeBstar(T, B, bucket_A, bucket_B, n, openMP); + + if (num_indexes == NULL || indexes == NULL) { + pidx = construct_BWT(T, B, bucket_A, bucket_B, n, m); + } else { + pidx = construct_BWT_indexes(T, B, bucket_A, bucket_B, n, m, num_indexes, indexes); + } + + /* Copy to output string. */ + U[0] = T[n - 1]; + for(i = 0; i < pidx; ++i) { U[i + 1] = (unsigned char)B[i]; } + for(i += 1; i < n; ++i) { U[i] = (unsigned char)B[i]; } + pidx += 1; + } else { + pidx = -2; + } + + free(bucket_B); + free(bucket_A); + if(A == NULL) { free(B); } + + return pidx; +} diff --git a/c-blosc/internal-complibs/zstd-1.5.6/dictBuilder/divsufsort.h b/c-blosc/internal-complibs/zstd-1.5.6/dictBuilder/divsufsort.h new file mode 100644 index 0000000..5440994 --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.5.6/dictBuilder/divsufsort.h @@ -0,0 +1,67 @@ +/* + * divsufsort.h for libdivsufsort-lite + * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _DIVSUFSORT_H +#define _DIVSUFSORT_H 1 + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + + +/*- Prototypes -*/ + +/** + * Constructs the suffix array of a given string. + * @param T [0..n-1] The input string. + * @param SA [0..n-1] The output array of suffixes. + * @param n The length of the given string. + * @param openMP enables OpenMP optimization. + * @return 0 if no error occurred, -1 or -2 otherwise. + */ +int +divsufsort(const unsigned char *T, int *SA, int n, int openMP); + +/** + * Constructs the burrows-wheeler transformed string of a given string. + * @param T [0..n-1] The input string. + * @param U [0..n-1] The output string. (can be T) + * @param A [0..n-1] The temporary array. (can be NULL) + * @param n The length of the given string. + * @param num_indexes The length of secondary indexes array. (can be NULL) + * @param indexes The secondary indexes array. (can be NULL) + * @param openMP enables OpenMP optimization. + * @return The primary index if no error occurred, -1 or -2 otherwise. + */ +int +divbwt(const unsigned char *T, unsigned char *U, int *A, int n, unsigned char * num_indexes, int * indexes, int openMP); + + +#ifdef __cplusplus +} /* extern "C" */ +#endif /* __cplusplus */ + +#endif /* _DIVSUFSORT_H */ diff --git a/c-blosc/internal-complibs/zstd-1.5.6/dictBuilder/fastcover.c b/c-blosc/internal-complibs/zstd-1.5.6/dictBuilder/fastcover.c new file mode 100644 index 0000000..a958eb3 --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.5.6/dictBuilder/fastcover.c @@ -0,0 +1,766 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/*-************************************* +* Dependencies +***************************************/ +#include /* fprintf */ +#include /* malloc, free, qsort */ +#include /* memset */ +#include /* clock */ + +#ifndef ZDICT_STATIC_LINKING_ONLY +# define ZDICT_STATIC_LINKING_ONLY +#endif + +#include "../common/mem.h" /* read */ +#include "../common/pool.h" +#include "../common/threading.h" +#include "../common/zstd_internal.h" /* includes zstd.h */ +#include "../compress/zstd_compress_internal.h" /* ZSTD_hash*() */ +#include "../zdict.h" +#include "cover.h" + + +/*-************************************* +* Constants +***************************************/ +/** +* There are 32bit indexes used to ref samples, so limit samples size to 4GB +* on 64bit builds. +* For 32bit builds we choose 1 GB. +* Most 32bit platforms have 2GB user-mode addressable space and we allocate a large +* contiguous buffer, so 1GB is already a high limit. +*/ +#define FASTCOVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB)) +#define FASTCOVER_MAX_F 31 +#define FASTCOVER_MAX_ACCEL 10 +#define FASTCOVER_DEFAULT_SPLITPOINT 0.75 +#define DEFAULT_F 20 +#define DEFAULT_ACCEL 1 + + +/*-************************************* +* Console display +***************************************/ +#ifndef LOCALDISPLAYLEVEL +static int g_displayLevel = 0; +#endif +#undef DISPLAY +#define DISPLAY(...) \ + { \ + fprintf(stderr, __VA_ARGS__); \ + fflush(stderr); \ + } +#undef LOCALDISPLAYLEVEL +#define LOCALDISPLAYLEVEL(displayLevel, l, ...) \ + if (displayLevel >= l) { \ + DISPLAY(__VA_ARGS__); \ + } /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */ +#undef DISPLAYLEVEL +#define DISPLAYLEVEL(l, ...) LOCALDISPLAYLEVEL(g_displayLevel, l, __VA_ARGS__) + +#ifndef LOCALDISPLAYUPDATE +static const clock_t g_refreshRate = CLOCKS_PER_SEC * 15 / 100; +static clock_t g_time = 0; +#endif +#undef LOCALDISPLAYUPDATE +#define LOCALDISPLAYUPDATE(displayLevel, l, ...) \ + if (displayLevel >= l) { \ + if ((clock() - g_time > g_refreshRate) || (displayLevel >= 4)) { \ + g_time = clock(); \ + DISPLAY(__VA_ARGS__); \ + } \ + } +#undef DISPLAYUPDATE +#define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__) + + +/*-************************************* +* Hash Functions +***************************************/ +/** + * Hash the d-byte value pointed to by p and mod 2^f into the frequency vector + */ +static size_t FASTCOVER_hashPtrToIndex(const void* p, U32 f, unsigned d) { + if (d == 6) { + return ZSTD_hash6Ptr(p, f); + } + return ZSTD_hash8Ptr(p, f); +} + + +/*-************************************* +* Acceleration +***************************************/ +typedef struct { + unsigned finalize; /* Percentage of training samples used for ZDICT_finalizeDictionary */ + unsigned skip; /* Number of dmer skipped between each dmer counted in computeFrequency */ +} FASTCOVER_accel_t; + + +static const FASTCOVER_accel_t FASTCOVER_defaultAccelParameters[FASTCOVER_MAX_ACCEL+1] = { + { 100, 0 }, /* accel = 0, should not happen because accel = 0 defaults to accel = 1 */ + { 100, 0 }, /* accel = 1 */ + { 50, 1 }, /* accel = 2 */ + { 34, 2 }, /* accel = 3 */ + { 25, 3 }, /* accel = 4 */ + { 20, 4 }, /* accel = 5 */ + { 17, 5 }, /* accel = 6 */ + { 14, 6 }, /* accel = 7 */ + { 13, 7 }, /* accel = 8 */ + { 11, 8 }, /* accel = 9 */ + { 10, 9 }, /* accel = 10 */ +}; + + +/*-************************************* +* Context +***************************************/ +typedef struct { + const BYTE *samples; + size_t *offsets; + const size_t *samplesSizes; + size_t nbSamples; + size_t nbTrainSamples; + size_t nbTestSamples; + size_t nbDmers; + U32 *freqs; + unsigned d; + unsigned f; + FASTCOVER_accel_t accelParams; +} FASTCOVER_ctx_t; + + +/*-************************************* +* Helper functions +***************************************/ +/** + * Selects the best segment in an epoch. + * Segments of are scored according to the function: + * + * Let F(d) be the frequency of all dmers with hash value d. + * Let S_i be hash value of the dmer at position i of segment S which has length k. + * + * Score(S) = F(S_1) + F(S_2) + ... + F(S_{k-d+1}) + * + * Once the dmer with hash value d is in the dictionary we set F(d) = 0. + */ +static COVER_segment_t FASTCOVER_selectSegment(const FASTCOVER_ctx_t *ctx, + U32 *freqs, U32 begin, U32 end, + ZDICT_cover_params_t parameters, + U16* segmentFreqs) { + /* Constants */ + const U32 k = parameters.k; + const U32 d = parameters.d; + const U32 f = ctx->f; + const U32 dmersInK = k - d + 1; + + /* Try each segment (activeSegment) and save the best (bestSegment) */ + COVER_segment_t bestSegment = {0, 0, 0}; + COVER_segment_t activeSegment; + + /* Reset the activeDmers in the segment */ + /* The activeSegment starts at the beginning of the epoch. */ + activeSegment.begin = begin; + activeSegment.end = begin; + activeSegment.score = 0; + + /* Slide the activeSegment through the whole epoch. + * Save the best segment in bestSegment. + */ + while (activeSegment.end < end) { + /* Get hash value of current dmer */ + const size_t idx = FASTCOVER_hashPtrToIndex(ctx->samples + activeSegment.end, f, d); + + /* Add frequency of this index to score if this is the first occurrence of index in active segment */ + if (segmentFreqs[idx] == 0) { + activeSegment.score += freqs[idx]; + } + /* Increment end of segment and segmentFreqs*/ + activeSegment.end += 1; + segmentFreqs[idx] += 1; + /* If the window is now too large, drop the first position */ + if (activeSegment.end - activeSegment.begin == dmersInK + 1) { + /* Get hash value of the dmer to be eliminated from active segment */ + const size_t delIndex = FASTCOVER_hashPtrToIndex(ctx->samples + activeSegment.begin, f, d); + segmentFreqs[delIndex] -= 1; + /* Subtract frequency of this index from score if this is the last occurrence of this index in active segment */ + if (segmentFreqs[delIndex] == 0) { + activeSegment.score -= freqs[delIndex]; + } + /* Increment start of segment */ + activeSegment.begin += 1; + } + + /* If this segment is the best so far save it */ + if (activeSegment.score > bestSegment.score) { + bestSegment = activeSegment; + } + } + + /* Zero out rest of segmentFreqs array */ + while (activeSegment.begin < end) { + const size_t delIndex = FASTCOVER_hashPtrToIndex(ctx->samples + activeSegment.begin, f, d); + segmentFreqs[delIndex] -= 1; + activeSegment.begin += 1; + } + + { + /* Zero the frequency of hash value of each dmer covered by the chosen segment. */ + U32 pos; + for (pos = bestSegment.begin; pos != bestSegment.end; ++pos) { + const size_t i = FASTCOVER_hashPtrToIndex(ctx->samples + pos, f, d); + freqs[i] = 0; + } + } + + return bestSegment; +} + + +static int FASTCOVER_checkParameters(ZDICT_cover_params_t parameters, + size_t maxDictSize, unsigned f, + unsigned accel) { + /* k, d, and f are required parameters */ + if (parameters.d == 0 || parameters.k == 0) { + return 0; + } + /* d has to be 6 or 8 */ + if (parameters.d != 6 && parameters.d != 8) { + return 0; + } + /* k <= maxDictSize */ + if (parameters.k > maxDictSize) { + return 0; + } + /* d <= k */ + if (parameters.d > parameters.k) { + return 0; + } + /* 0 < f <= FASTCOVER_MAX_F*/ + if (f > FASTCOVER_MAX_F || f == 0) { + return 0; + } + /* 0 < splitPoint <= 1 */ + if (parameters.splitPoint <= 0 || parameters.splitPoint > 1) { + return 0; + } + /* 0 < accel <= 10 */ + if (accel > 10 || accel == 0) { + return 0; + } + return 1; +} + + +/** + * Clean up a context initialized with `FASTCOVER_ctx_init()`. + */ +static void +FASTCOVER_ctx_destroy(FASTCOVER_ctx_t* ctx) +{ + if (!ctx) return; + + free(ctx->freqs); + ctx->freqs = NULL; + + free(ctx->offsets); + ctx->offsets = NULL; +} + + +/** + * Calculate for frequency of hash value of each dmer in ctx->samples + */ +static void +FASTCOVER_computeFrequency(U32* freqs, const FASTCOVER_ctx_t* ctx) +{ + const unsigned f = ctx->f; + const unsigned d = ctx->d; + const unsigned skip = ctx->accelParams.skip; + const unsigned readLength = MAX(d, 8); + size_t i; + assert(ctx->nbTrainSamples >= 5); + assert(ctx->nbTrainSamples <= ctx->nbSamples); + for (i = 0; i < ctx->nbTrainSamples; i++) { + size_t start = ctx->offsets[i]; /* start of current dmer */ + size_t const currSampleEnd = ctx->offsets[i+1]; + while (start + readLength <= currSampleEnd) { + const size_t dmerIndex = FASTCOVER_hashPtrToIndex(ctx->samples + start, f, d); + freqs[dmerIndex]++; + start = start + skip + 1; + } + } +} + + +/** + * Prepare a context for dictionary building. + * The context is only dependent on the parameter `d` and can be used multiple + * times. + * Returns 0 on success or error code on error. + * The context must be destroyed with `FASTCOVER_ctx_destroy()`. + */ +static size_t +FASTCOVER_ctx_init(FASTCOVER_ctx_t* ctx, + const void* samplesBuffer, + const size_t* samplesSizes, unsigned nbSamples, + unsigned d, double splitPoint, unsigned f, + FASTCOVER_accel_t accelParams) +{ + const BYTE* const samples = (const BYTE*)samplesBuffer; + const size_t totalSamplesSize = COVER_sum(samplesSizes, nbSamples); + /* Split samples into testing and training sets */ + const unsigned nbTrainSamples = splitPoint < 1.0 ? (unsigned)((double)nbSamples * splitPoint) : nbSamples; + const unsigned nbTestSamples = splitPoint < 1.0 ? nbSamples - nbTrainSamples : nbSamples; + const size_t trainingSamplesSize = splitPoint < 1.0 ? COVER_sum(samplesSizes, nbTrainSamples) : totalSamplesSize; + const size_t testSamplesSize = splitPoint < 1.0 ? COVER_sum(samplesSizes + nbTrainSamples, nbTestSamples) : totalSamplesSize; + + /* Checks */ + if (totalSamplesSize < MAX(d, sizeof(U64)) || + totalSamplesSize >= (size_t)FASTCOVER_MAX_SAMPLES_SIZE) { + DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n", + (unsigned)(totalSamplesSize >> 20), (FASTCOVER_MAX_SAMPLES_SIZE >> 20)); + return ERROR(srcSize_wrong); + } + + /* Check if there are at least 5 training samples */ + if (nbTrainSamples < 5) { + DISPLAYLEVEL(1, "Total number of training samples is %u and is invalid\n", nbTrainSamples); + return ERROR(srcSize_wrong); + } + + /* Check if there's testing sample */ + if (nbTestSamples < 1) { + DISPLAYLEVEL(1, "Total number of testing samples is %u and is invalid.\n", nbTestSamples); + return ERROR(srcSize_wrong); + } + + /* Zero the context */ + memset(ctx, 0, sizeof(*ctx)); + DISPLAYLEVEL(2, "Training on %u samples of total size %u\n", nbTrainSamples, + (unsigned)trainingSamplesSize); + DISPLAYLEVEL(2, "Testing on %u samples of total size %u\n", nbTestSamples, + (unsigned)testSamplesSize); + + ctx->samples = samples; + ctx->samplesSizes = samplesSizes; + ctx->nbSamples = nbSamples; + ctx->nbTrainSamples = nbTrainSamples; + ctx->nbTestSamples = nbTestSamples; + ctx->nbDmers = trainingSamplesSize - MAX(d, sizeof(U64)) + 1; + ctx->d = d; + ctx->f = f; + ctx->accelParams = accelParams; + + /* The offsets of each file */ + ctx->offsets = (size_t*)calloc((nbSamples + 1), sizeof(size_t)); + if (ctx->offsets == NULL) { + DISPLAYLEVEL(1, "Failed to allocate scratch buffers \n"); + FASTCOVER_ctx_destroy(ctx); + return ERROR(memory_allocation); + } + + /* Fill offsets from the samplesSizes */ + { U32 i; + ctx->offsets[0] = 0; + assert(nbSamples >= 5); + for (i = 1; i <= nbSamples; ++i) { + ctx->offsets[i] = ctx->offsets[i - 1] + samplesSizes[i - 1]; + } + } + + /* Initialize frequency array of size 2^f */ + ctx->freqs = (U32*)calloc(((U64)1 << f), sizeof(U32)); + if (ctx->freqs == NULL) { + DISPLAYLEVEL(1, "Failed to allocate frequency table \n"); + FASTCOVER_ctx_destroy(ctx); + return ERROR(memory_allocation); + } + + DISPLAYLEVEL(2, "Computing frequencies\n"); + FASTCOVER_computeFrequency(ctx->freqs, ctx); + + return 0; +} + + +/** + * Given the prepared context build the dictionary. + */ +static size_t +FASTCOVER_buildDictionary(const FASTCOVER_ctx_t* ctx, + U32* freqs, + void* dictBuffer, size_t dictBufferCapacity, + ZDICT_cover_params_t parameters, + U16* segmentFreqs) +{ + BYTE *const dict = (BYTE *)dictBuffer; + size_t tail = dictBufferCapacity; + /* Divide the data into epochs. We will select one segment from each epoch. */ + const COVER_epoch_info_t epochs = COVER_computeEpochs( + (U32)dictBufferCapacity, (U32)ctx->nbDmers, parameters.k, 1); + const size_t maxZeroScoreRun = 10; + size_t zeroScoreRun = 0; + size_t epoch; + DISPLAYLEVEL(2, "Breaking content into %u epochs of size %u\n", + (U32)epochs.num, (U32)epochs.size); + /* Loop through the epochs until there are no more segments or the dictionary + * is full. + */ + for (epoch = 0; tail > 0; epoch = (epoch + 1) % epochs.num) { + const U32 epochBegin = (U32)(epoch * epochs.size); + const U32 epochEnd = epochBegin + epochs.size; + size_t segmentSize; + /* Select a segment */ + COVER_segment_t segment = FASTCOVER_selectSegment( + ctx, freqs, epochBegin, epochEnd, parameters, segmentFreqs); + + /* If the segment covers no dmers, then we are out of content. + * There may be new content in other epochs, for continue for some time. + */ + if (segment.score == 0) { + if (++zeroScoreRun >= maxZeroScoreRun) { + break; + } + continue; + } + zeroScoreRun = 0; + + /* Trim the segment if necessary and if it is too small then we are done */ + segmentSize = MIN(segment.end - segment.begin + parameters.d - 1, tail); + if (segmentSize < parameters.d) { + break; + } + + /* We fill the dictionary from the back to allow the best segments to be + * referenced with the smallest offsets. + */ + tail -= segmentSize; + memcpy(dict + tail, ctx->samples + segment.begin, segmentSize); + DISPLAYUPDATE( + 2, "\r%u%% ", + (unsigned)(((dictBufferCapacity - tail) * 100) / dictBufferCapacity)); + } + DISPLAYLEVEL(2, "\r%79s\r", ""); + return tail; +} + +/** + * Parameters for FASTCOVER_tryParameters(). + */ +typedef struct FASTCOVER_tryParameters_data_s { + const FASTCOVER_ctx_t* ctx; + COVER_best_t* best; + size_t dictBufferCapacity; + ZDICT_cover_params_t parameters; +} FASTCOVER_tryParameters_data_t; + + +/** + * Tries a set of parameters and updates the COVER_best_t with the results. + * This function is thread safe if zstd is compiled with multithreaded support. + * It takes its parameters as an *OWNING* opaque pointer to support threading. + */ +static void FASTCOVER_tryParameters(void* opaque) +{ + /* Save parameters as local variables */ + FASTCOVER_tryParameters_data_t *const data = (FASTCOVER_tryParameters_data_t*)opaque; + const FASTCOVER_ctx_t *const ctx = data->ctx; + const ZDICT_cover_params_t parameters = data->parameters; + size_t dictBufferCapacity = data->dictBufferCapacity; + size_t totalCompressedSize = ERROR(GENERIC); + /* Initialize array to keep track of frequency of dmer within activeSegment */ + U16* segmentFreqs = (U16*)calloc(((U64)1 << ctx->f), sizeof(U16)); + /* Allocate space for hash table, dict, and freqs */ + BYTE *const dict = (BYTE*)malloc(dictBufferCapacity); + COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC)); + U32* freqs = (U32*) malloc(((U64)1 << ctx->f) * sizeof(U32)); + if (!segmentFreqs || !dict || !freqs) { + DISPLAYLEVEL(1, "Failed to allocate buffers: out of memory\n"); + goto _cleanup; + } + /* Copy the frequencies because we need to modify them */ + memcpy(freqs, ctx->freqs, ((U64)1 << ctx->f) * sizeof(U32)); + /* Build the dictionary */ + { const size_t tail = FASTCOVER_buildDictionary(ctx, freqs, dict, dictBufferCapacity, + parameters, segmentFreqs); + + const unsigned nbFinalizeSamples = (unsigned)(ctx->nbTrainSamples * ctx->accelParams.finalize / 100); + selection = COVER_selectDict(dict + tail, dictBufferCapacity, dictBufferCapacity - tail, + ctx->samples, ctx->samplesSizes, nbFinalizeSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets, + totalCompressedSize); + + if (COVER_dictSelectionIsError(selection)) { + DISPLAYLEVEL(1, "Failed to select dictionary\n"); + goto _cleanup; + } + } +_cleanup: + free(dict); + COVER_best_finish(data->best, parameters, selection); + free(data); + free(segmentFreqs); + COVER_dictSelectionFree(selection); + free(freqs); +} + + +static void +FASTCOVER_convertToCoverParams(ZDICT_fastCover_params_t fastCoverParams, + ZDICT_cover_params_t* coverParams) +{ + coverParams->k = fastCoverParams.k; + coverParams->d = fastCoverParams.d; + coverParams->steps = fastCoverParams.steps; + coverParams->nbThreads = fastCoverParams.nbThreads; + coverParams->splitPoint = fastCoverParams.splitPoint; + coverParams->zParams = fastCoverParams.zParams; + coverParams->shrinkDict = fastCoverParams.shrinkDict; +} + + +static void +FASTCOVER_convertToFastCoverParams(ZDICT_cover_params_t coverParams, + ZDICT_fastCover_params_t* fastCoverParams, + unsigned f, unsigned accel) +{ + fastCoverParams->k = coverParams.k; + fastCoverParams->d = coverParams.d; + fastCoverParams->steps = coverParams.steps; + fastCoverParams->nbThreads = coverParams.nbThreads; + fastCoverParams->splitPoint = coverParams.splitPoint; + fastCoverParams->f = f; + fastCoverParams->accel = accel; + fastCoverParams->zParams = coverParams.zParams; + fastCoverParams->shrinkDict = coverParams.shrinkDict; +} + + +ZDICTLIB_STATIC_API size_t +ZDICT_trainFromBuffer_fastCover(void* dictBuffer, size_t dictBufferCapacity, + const void* samplesBuffer, + const size_t* samplesSizes, unsigned nbSamples, + ZDICT_fastCover_params_t parameters) +{ + BYTE* const dict = (BYTE*)dictBuffer; + FASTCOVER_ctx_t ctx; + ZDICT_cover_params_t coverParams; + FASTCOVER_accel_t accelParams; + /* Initialize global data */ + g_displayLevel = (int)parameters.zParams.notificationLevel; + /* Assign splitPoint and f if not provided */ + parameters.splitPoint = 1.0; + parameters.f = parameters.f == 0 ? DEFAULT_F : parameters.f; + parameters.accel = parameters.accel == 0 ? DEFAULT_ACCEL : parameters.accel; + /* Convert to cover parameter */ + memset(&coverParams, 0 , sizeof(coverParams)); + FASTCOVER_convertToCoverParams(parameters, &coverParams); + /* Checks */ + if (!FASTCOVER_checkParameters(coverParams, dictBufferCapacity, parameters.f, + parameters.accel)) { + DISPLAYLEVEL(1, "FASTCOVER parameters incorrect\n"); + return ERROR(parameter_outOfBound); + } + if (nbSamples == 0) { + DISPLAYLEVEL(1, "FASTCOVER must have at least one input file\n"); + return ERROR(srcSize_wrong); + } + if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) { + DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n", + ZDICT_DICTSIZE_MIN); + return ERROR(dstSize_tooSmall); + } + /* Assign corresponding FASTCOVER_accel_t to accelParams*/ + accelParams = FASTCOVER_defaultAccelParameters[parameters.accel]; + /* Initialize context */ + { + size_t const initVal = FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, + coverParams.d, parameters.splitPoint, parameters.f, + accelParams); + if (ZSTD_isError(initVal)) { + DISPLAYLEVEL(1, "Failed to initialize context\n"); + return initVal; + } + } + COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.nbDmers, g_displayLevel); + /* Build the dictionary */ + DISPLAYLEVEL(2, "Building dictionary\n"); + { + /* Initialize array to keep track of frequency of dmer within activeSegment */ + U16* segmentFreqs = (U16 *)calloc(((U64)1 << parameters.f), sizeof(U16)); + const size_t tail = FASTCOVER_buildDictionary(&ctx, ctx.freqs, dictBuffer, + dictBufferCapacity, coverParams, segmentFreqs); + const unsigned nbFinalizeSamples = (unsigned)(ctx.nbTrainSamples * ctx.accelParams.finalize / 100); + const size_t dictionarySize = ZDICT_finalizeDictionary( + dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail, + samplesBuffer, samplesSizes, nbFinalizeSamples, coverParams.zParams); + if (!ZSTD_isError(dictionarySize)) { + DISPLAYLEVEL(2, "Constructed dictionary of size %u\n", + (unsigned)dictionarySize); + } + FASTCOVER_ctx_destroy(&ctx); + free(segmentFreqs); + return dictionarySize; + } +} + + +ZDICTLIB_STATIC_API size_t +ZDICT_optimizeTrainFromBuffer_fastCover( + void* dictBuffer, size_t dictBufferCapacity, + const void* samplesBuffer, + const size_t* samplesSizes, unsigned nbSamples, + ZDICT_fastCover_params_t* parameters) +{ + ZDICT_cover_params_t coverParams; + FASTCOVER_accel_t accelParams; + /* constants */ + const unsigned nbThreads = parameters->nbThreads; + const double splitPoint = + parameters->splitPoint <= 0.0 ? FASTCOVER_DEFAULT_SPLITPOINT : parameters->splitPoint; + const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d; + const unsigned kMaxD = parameters->d == 0 ? 8 : parameters->d; + const unsigned kMinK = parameters->k == 0 ? 50 : parameters->k; + const unsigned kMaxK = parameters->k == 0 ? 2000 : parameters->k; + const unsigned kSteps = parameters->steps == 0 ? 40 : parameters->steps; + const unsigned kStepSize = MAX((kMaxK - kMinK) / kSteps, 1); + const unsigned kIterations = + (1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize); + const unsigned f = parameters->f == 0 ? DEFAULT_F : parameters->f; + const unsigned accel = parameters->accel == 0 ? DEFAULT_ACCEL : parameters->accel; + const unsigned shrinkDict = 0; + /* Local variables */ + const int displayLevel = (int)parameters->zParams.notificationLevel; + unsigned iteration = 1; + unsigned d; + unsigned k; + COVER_best_t best; + POOL_ctx *pool = NULL; + int warned = 0; + /* Checks */ + if (splitPoint <= 0 || splitPoint > 1) { + LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect splitPoint\n"); + return ERROR(parameter_outOfBound); + } + if (accel == 0 || accel > FASTCOVER_MAX_ACCEL) { + LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect accel\n"); + return ERROR(parameter_outOfBound); + } + if (kMinK < kMaxD || kMaxK < kMinK) { + LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect k\n"); + return ERROR(parameter_outOfBound); + } + if (nbSamples == 0) { + LOCALDISPLAYLEVEL(displayLevel, 1, "FASTCOVER must have at least one input file\n"); + return ERROR(srcSize_wrong); + } + if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) { + LOCALDISPLAYLEVEL(displayLevel, 1, "dictBufferCapacity must be at least %u\n", + ZDICT_DICTSIZE_MIN); + return ERROR(dstSize_tooSmall); + } + if (nbThreads > 1) { + pool = POOL_create(nbThreads, 1); + if (!pool) { + return ERROR(memory_allocation); + } + } + /* Initialization */ + COVER_best_init(&best); + memset(&coverParams, 0 , sizeof(coverParams)); + FASTCOVER_convertToCoverParams(*parameters, &coverParams); + accelParams = FASTCOVER_defaultAccelParameters[accel]; + /* Turn down global display level to clean up display at level 2 and below */ + g_displayLevel = displayLevel == 0 ? 0 : displayLevel - 1; + /* Loop through d first because each new value needs a new context */ + LOCALDISPLAYLEVEL(displayLevel, 2, "Trying %u different sets of parameters\n", + kIterations); + for (d = kMinD; d <= kMaxD; d += 2) { + /* Initialize the context for this value of d */ + FASTCOVER_ctx_t ctx; + LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d); + { + size_t const initVal = FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint, f, accelParams); + if (ZSTD_isError(initVal)) { + LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n"); + COVER_best_destroy(&best); + POOL_free(pool); + return initVal; + } + } + if (!warned) { + COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.nbDmers, displayLevel); + warned = 1; + } + /* Loop through k reusing the same context */ + for (k = kMinK; k <= kMaxK; k += kStepSize) { + /* Prepare the arguments */ + FASTCOVER_tryParameters_data_t *data = (FASTCOVER_tryParameters_data_t *)malloc( + sizeof(FASTCOVER_tryParameters_data_t)); + LOCALDISPLAYLEVEL(displayLevel, 3, "k=%u\n", k); + if (!data) { + LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to allocate parameters\n"); + COVER_best_destroy(&best); + FASTCOVER_ctx_destroy(&ctx); + POOL_free(pool); + return ERROR(memory_allocation); + } + data->ctx = &ctx; + data->best = &best; + data->dictBufferCapacity = dictBufferCapacity; + data->parameters = coverParams; + data->parameters.k = k; + data->parameters.d = d; + data->parameters.splitPoint = splitPoint; + data->parameters.steps = kSteps; + data->parameters.shrinkDict = shrinkDict; + data->parameters.zParams.notificationLevel = (unsigned)g_displayLevel; + /* Check the parameters */ + if (!FASTCOVER_checkParameters(data->parameters, dictBufferCapacity, + data->ctx->f, accel)) { + DISPLAYLEVEL(1, "FASTCOVER parameters incorrect\n"); + free(data); + continue; + } + /* Call the function and pass ownership of data to it */ + COVER_best_start(&best); + if (pool) { + POOL_add(pool, &FASTCOVER_tryParameters, data); + } else { + FASTCOVER_tryParameters(data); + } + /* Print status */ + LOCALDISPLAYUPDATE(displayLevel, 2, "\r%u%% ", + (unsigned)((iteration * 100) / kIterations)); + ++iteration; + } + COVER_best_wait(&best); + FASTCOVER_ctx_destroy(&ctx); + } + LOCALDISPLAYLEVEL(displayLevel, 2, "\r%79s\r", ""); + /* Fill the output buffer and parameters with output of the best parameters */ + { + const size_t dictSize = best.dictSize; + if (ZSTD_isError(best.compressedSize)) { + const size_t compressedSize = best.compressedSize; + COVER_best_destroy(&best); + POOL_free(pool); + return compressedSize; + } + FASTCOVER_convertToFastCoverParams(best.parameters, parameters, f, accel); + memcpy(dictBuffer, best.dict, dictSize); + COVER_best_destroy(&best); + POOL_free(pool); + return dictSize; + } + +} diff --git a/c-blosc/internal-complibs/zstd-1.5.6/dictBuilder/zdict.c b/c-blosc/internal-complibs/zstd-1.5.6/dictBuilder/zdict.c new file mode 100644 index 0000000..82e999e --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.5.6/dictBuilder/zdict.c @@ -0,0 +1,1133 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +/*-************************************** +* Tuning parameters +****************************************/ +#define MINRATIO 4 /* minimum nb of apparition to be selected in dictionary */ +#define ZDICT_MAX_SAMPLES_SIZE (2000U << 20) +#define ZDICT_MIN_SAMPLES_SIZE (ZDICT_CONTENTSIZE_MIN * MINRATIO) + + +/*-************************************** +* Compiler Options +****************************************/ +/* Unix Large Files support (>4GB) */ +#define _FILE_OFFSET_BITS 64 +#if (defined(__sun__) && (!defined(__LP64__))) /* Sun Solaris 32-bits requires specific definitions */ +# ifndef _LARGEFILE_SOURCE +# define _LARGEFILE_SOURCE +# endif +#elif ! defined(__LP64__) /* No point defining Large file for 64 bit */ +# ifndef _LARGEFILE64_SOURCE +# define _LARGEFILE64_SOURCE +# endif +#endif + + +/*-************************************* +* Dependencies +***************************************/ +#include /* malloc, free */ +#include /* memset */ +#include /* fprintf, fopen, ftello64 */ +#include /* clock */ + +#ifndef ZDICT_STATIC_LINKING_ONLY +# define ZDICT_STATIC_LINKING_ONLY +#endif + +#include "../common/mem.h" /* read */ +#include "../common/fse.h" /* FSE_normalizeCount, FSE_writeNCount */ +#include "../common/huf.h" /* HUF_buildCTable, HUF_writeCTable */ +#include "../common/zstd_internal.h" /* includes zstd.h */ +#include "../common/xxhash.h" /* XXH64 */ +#include "../compress/zstd_compress_internal.h" /* ZSTD_loadCEntropy() */ +#include "../zdict.h" +#include "divsufsort.h" +#include "../common/bits.h" /* ZSTD_NbCommonBytes */ + + +/*-************************************* +* Constants +***************************************/ +#define KB *(1 <<10) +#define MB *(1 <<20) +#define GB *(1U<<30) + +#define DICTLISTSIZE_DEFAULT 10000 + +#define NOISELENGTH 32 + +static const U32 g_selectivity_default = 9; + + +/*-************************************* +* Console display +***************************************/ +#undef DISPLAY +#define DISPLAY(...) do { fprintf(stderr, __VA_ARGS__); fflush( stderr ); } while (0) +#undef DISPLAYLEVEL +#define DISPLAYLEVEL(l, ...) do { if (notificationLevel>=l) { DISPLAY(__VA_ARGS__); } } while (0) /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */ + +static clock_t ZDICT_clockSpan(clock_t nPrevious) { return clock() - nPrevious; } + +static void ZDICT_printHex(const void* ptr, size_t length) +{ + const BYTE* const b = (const BYTE*)ptr; + size_t u; + for (u=0; u126) c = '.'; /* non-printable char */ + DISPLAY("%c", c); + } +} + + +/*-******************************************************** +* Helper functions +**********************************************************/ +unsigned ZDICT_isError(size_t errorCode) { return ERR_isError(errorCode); } + +const char* ZDICT_getErrorName(size_t errorCode) { return ERR_getErrorName(errorCode); } + +unsigned ZDICT_getDictID(const void* dictBuffer, size_t dictSize) +{ + if (dictSize < 8) return 0; + if (MEM_readLE32(dictBuffer) != ZSTD_MAGIC_DICTIONARY) return 0; + return MEM_readLE32((const char*)dictBuffer + 4); +} + +size_t ZDICT_getDictHeaderSize(const void* dictBuffer, size_t dictSize) +{ + size_t headerSize; + if (dictSize <= 8 || MEM_readLE32(dictBuffer) != ZSTD_MAGIC_DICTIONARY) return ERROR(dictionary_corrupted); + + { ZSTD_compressedBlockState_t* bs = (ZSTD_compressedBlockState_t*)malloc(sizeof(ZSTD_compressedBlockState_t)); + U32* wksp = (U32*)malloc(HUF_WORKSPACE_SIZE); + if (!bs || !wksp) { + headerSize = ERROR(memory_allocation); + } else { + ZSTD_reset_compressedBlockState(bs); + headerSize = ZSTD_loadCEntropy(bs, wksp, dictBuffer, dictSize); + } + + free(bs); + free(wksp); + } + + return headerSize; +} + +/*-******************************************************** +* Dictionary training functions +**********************************************************/ +/*! ZDICT_count() : + Count the nb of common bytes between 2 pointers. + Note : this function presumes end of buffer followed by noisy guard band. +*/ +static size_t ZDICT_count(const void* pIn, const void* pMatch) +{ + const char* const pStart = (const char*)pIn; + for (;;) { + size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn); + if (!diff) { + pIn = (const char*)pIn+sizeof(size_t); + pMatch = (const char*)pMatch+sizeof(size_t); + continue; + } + pIn = (const char*)pIn+ZSTD_NbCommonBytes(diff); + return (size_t)((const char*)pIn - pStart); + } +} + + +typedef struct { + U32 pos; + U32 length; + U32 savings; +} dictItem; + +static void ZDICT_initDictItem(dictItem* d) +{ + d->pos = 1; + d->length = 0; + d->savings = (U32)(-1); +} + + +#define LLIMIT 64 /* heuristic determined experimentally */ +#define MINMATCHLENGTH 7 /* heuristic determined experimentally */ +static dictItem ZDICT_analyzePos( + BYTE* doneMarks, + const int* suffix, U32 start, + const void* buffer, U32 minRatio, U32 notificationLevel) +{ + U32 lengthList[LLIMIT] = {0}; + U32 cumulLength[LLIMIT] = {0}; + U32 savings[LLIMIT] = {0}; + const BYTE* b = (const BYTE*)buffer; + size_t maxLength = LLIMIT; + size_t pos = (size_t)suffix[start]; + U32 end = start; + dictItem solution; + + /* init */ + memset(&solution, 0, sizeof(solution)); + doneMarks[pos] = 1; + + /* trivial repetition cases */ + if ( (MEM_read16(b+pos+0) == MEM_read16(b+pos+2)) + ||(MEM_read16(b+pos+1) == MEM_read16(b+pos+3)) + ||(MEM_read16(b+pos+2) == MEM_read16(b+pos+4)) ) { + /* skip and mark segment */ + U16 const pattern16 = MEM_read16(b+pos+4); + U32 u, patternEnd = 6; + while (MEM_read16(b+pos+patternEnd) == pattern16) patternEnd+=2 ; + if (b[pos+patternEnd] == b[pos+patternEnd-1]) patternEnd++; + for (u=1; u= MINMATCHLENGTH); + } + + /* look backward */ + { size_t length; + do { + length = ZDICT_count(b + pos, b + *(suffix+start-1)); + if (length >=MINMATCHLENGTH) start--; + } while(length >= MINMATCHLENGTH); + } + + /* exit if not found a minimum nb of repetitions */ + if (end-start < minRatio) { + U32 idx; + for(idx=start; idx= %i at pos %7u ", (unsigned)(end-start), MINMATCHLENGTH, (unsigned)pos); + DISPLAYLEVEL(4, "\n"); + + for (mml = MINMATCHLENGTH ; ; mml++) { + BYTE currentChar = 0; + U32 currentCount = 0; + U32 currentID = refinedStart; + U32 id; + U32 selectedCount = 0; + U32 selectedID = currentID; + for (id =refinedStart; id < refinedEnd; id++) { + if (b[suffix[id] + mml] != currentChar) { + if (currentCount > selectedCount) { + selectedCount = currentCount; + selectedID = currentID; + } + currentID = id; + currentChar = b[ suffix[id] + mml]; + currentCount = 0; + } + currentCount ++; + } + if (currentCount > selectedCount) { /* for last */ + selectedCount = currentCount; + selectedID = currentID; + } + + if (selectedCount < minRatio) + break; + refinedStart = selectedID; + refinedEnd = refinedStart + selectedCount; + } + + /* evaluate gain based on new dict */ + start = refinedStart; + pos = suffix[refinedStart]; + end = start; + memset(lengthList, 0, sizeof(lengthList)); + + /* look forward */ + { size_t length; + do { + end++; + length = ZDICT_count(b + pos, b + suffix[end]); + if (length >= LLIMIT) length = LLIMIT-1; + lengthList[length]++; + } while (length >=MINMATCHLENGTH); + } + + /* look backward */ + { size_t length = MINMATCHLENGTH; + while ((length >= MINMATCHLENGTH) & (start > 0)) { + length = ZDICT_count(b + pos, b + suffix[start - 1]); + if (length >= LLIMIT) length = LLIMIT - 1; + lengthList[length]++; + if (length >= MINMATCHLENGTH) start--; + } + } + + /* largest useful length */ + memset(cumulLength, 0, sizeof(cumulLength)); + cumulLength[maxLength-1] = lengthList[maxLength-1]; + for (i=(int)(maxLength-2); i>=0; i--) + cumulLength[i] = cumulLength[i+1] + lengthList[i]; + + for (i=LLIMIT-1; i>=MINMATCHLENGTH; i--) if (cumulLength[i]>=minRatio) break; + maxLength = i; + + /* reduce maxLength in case of final into repetitive data */ + { U32 l = (U32)maxLength; + BYTE const c = b[pos + maxLength-1]; + while (b[pos+l-2]==c) l--; + maxLength = l; + } + if (maxLength < MINMATCHLENGTH) return solution; /* skip : no long-enough solution */ + + /* calculate savings */ + savings[5] = 0; + for (i=MINMATCHLENGTH; i<=(int)maxLength; i++) + savings[i] = savings[i-1] + (lengthList[i] * (i-3)); + + DISPLAYLEVEL(4, "Selected dict at position %u, of length %u : saves %u (ratio: %.2f) \n", + (unsigned)pos, (unsigned)maxLength, (unsigned)savings[maxLength], (double)savings[maxLength] / (double)maxLength); + + solution.pos = (U32)pos; + solution.length = (U32)maxLength; + solution.savings = savings[maxLength]; + + /* mark positions done */ + { U32 id; + for (id=start; id solution.length) length = solution.length; + } + pEnd = (U32)(testedPos + length); + for (p=testedPos; ppos; + const U32 eltEnd = elt.pos + elt.length; + const char* const buf = (const char*) buffer; + + /* tail overlap */ + U32 u; for (u=1; u elt.pos) && (table[u].pos <= eltEnd)) { /* overlap, existing > new */ + /* append */ + U32 const addedLength = table[u].pos - elt.pos; + table[u].length += addedLength; + table[u].pos = elt.pos; + table[u].savings += elt.savings * addedLength / elt.length; /* rough approx */ + table[u].savings += elt.length / 8; /* rough approx bonus */ + elt = table[u]; + /* sort : improve rank */ + while ((u>1) && (table[u-1].savings < elt.savings)) + table[u] = table[u-1], u--; + table[u] = elt; + return u; + } } + + /* front overlap */ + for (u=1; u= elt.pos) && (table[u].pos < elt.pos)) { /* overlap, existing < new */ + /* append */ + int const addedLength = (int)eltEnd - (int)(table[u].pos + table[u].length); + table[u].savings += elt.length / 8; /* rough approx bonus */ + if (addedLength > 0) { /* otherwise, elt fully included into existing */ + table[u].length += addedLength; + table[u].savings += elt.savings * addedLength / elt.length; /* rough approx */ + } + /* sort : improve rank */ + elt = table[u]; + while ((u>1) && (table[u-1].savings < elt.savings)) + table[u] = table[u-1], u--; + table[u] = elt; + return u; + } + + if (MEM_read64(buf + table[u].pos) == MEM_read64(buf + elt.pos + 1)) { + if (isIncluded(buf + table[u].pos, buf + elt.pos + 1, table[u].length)) { + size_t const addedLength = MAX( (int)elt.length - (int)table[u].length , 1 ); + table[u].pos = elt.pos; + table[u].savings += (U32)(elt.savings * addedLength / elt.length); + table[u].length = MIN(elt.length, table[u].length + 1); + return u; + } + } + } + + return 0; +} + + +static void ZDICT_removeDictItem(dictItem* table, U32 id) +{ + /* convention : table[0].pos stores nb of elts */ + U32 const max = table[0].pos; + U32 u; + if (!id) return; /* protection, should never happen */ + for (u=id; upos--; +} + + +static void ZDICT_insertDictItem(dictItem* table, U32 maxSize, dictItem elt, const void* buffer) +{ + /* merge if possible */ + U32 mergeId = ZDICT_tryMerge(table, elt, 0, buffer); + if (mergeId) { + U32 newMerge = 1; + while (newMerge) { + newMerge = ZDICT_tryMerge(table, table[mergeId], mergeId, buffer); + if (newMerge) ZDICT_removeDictItem(table, mergeId); + mergeId = newMerge; + } + return; + } + + /* insert */ + { U32 current; + U32 nextElt = table->pos; + if (nextElt >= maxSize) nextElt = maxSize-1; + current = nextElt-1; + while (table[current].savings < elt.savings) { + table[current+1] = table[current]; + current--; + } + table[current+1] = elt; + table->pos = nextElt+1; + } +} + + +static U32 ZDICT_dictSize(const dictItem* dictList) +{ + U32 u, dictSize = 0; + for (u=1; u=l) { \ + if (ZDICT_clockSpan(displayClock) > refreshRate) { \ + displayClock = clock(); \ + DISPLAY(__VA_ARGS__); \ + } \ + if (notificationLevel>=4) fflush(stderr); \ + } \ + } while (0) + + /* init */ + DISPLAYLEVEL(2, "\r%70s\r", ""); /* clean display line */ + if (!suffix0 || !reverseSuffix || !doneMarks || !filePos) { + result = ERROR(memory_allocation); + goto _cleanup; + } + if (minRatio < MINRATIO) minRatio = MINRATIO; + memset(doneMarks, 0, bufferSize+16); + + /* limit sample set size (divsufsort limitation)*/ + if (bufferSize > ZDICT_MAX_SAMPLES_SIZE) DISPLAYLEVEL(3, "sample set too large : reduced to %u MB ...\n", (unsigned)(ZDICT_MAX_SAMPLES_SIZE>>20)); + while (bufferSize > ZDICT_MAX_SAMPLES_SIZE) bufferSize -= fileSizes[--nbFiles]; + + /* sort */ + DISPLAYLEVEL(2, "sorting %u files of total size %u MB ...\n", nbFiles, (unsigned)(bufferSize>>20)); + { int const divSuftSortResult = divsufsort((const unsigned char*)buffer, suffix, (int)bufferSize, 0); + if (divSuftSortResult != 0) { result = ERROR(GENERIC); goto _cleanup; } + } + suffix[bufferSize] = (int)bufferSize; /* leads into noise */ + suffix0[0] = (int)bufferSize; /* leads into noise */ + /* build reverse suffix sort */ + { size_t pos; + for (pos=0; pos < bufferSize; pos++) + reverseSuffix[suffix[pos]] = (U32)pos; + /* note filePos tracks borders between samples. + It's not used at this stage, but planned to become useful in a later update */ + filePos[0] = 0; + for (pos=1; pos> 21); + } +} + + +typedef struct +{ + ZSTD_CDict* dict; /* dictionary */ + ZSTD_CCtx* zc; /* working context */ + void* workPlace; /* must be ZSTD_BLOCKSIZE_MAX allocated */ +} EStats_ress_t; + +#define MAXREPOFFSET 1024 + +static void ZDICT_countEStats(EStats_ress_t esr, const ZSTD_parameters* params, + unsigned* countLit, unsigned* offsetcodeCount, unsigned* matchlengthCount, unsigned* litlengthCount, U32* repOffsets, + const void* src, size_t srcSize, + U32 notificationLevel) +{ + size_t const blockSizeMax = MIN (ZSTD_BLOCKSIZE_MAX, 1 << params->cParams.windowLog); + size_t cSize; + + if (srcSize > blockSizeMax) srcSize = blockSizeMax; /* protection vs large samples */ + { size_t const errorCode = ZSTD_compressBegin_usingCDict_deprecated(esr.zc, esr.dict); + if (ZSTD_isError(errorCode)) { DISPLAYLEVEL(1, "warning : ZSTD_compressBegin_usingCDict failed \n"); return; } + + } + cSize = ZSTD_compressBlock_deprecated(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_MAX, src, srcSize); + if (ZSTD_isError(cSize)) { DISPLAYLEVEL(3, "warning : could not compress sample size %u \n", (unsigned)srcSize); return; } + + if (cSize) { /* if == 0; block is not compressible */ + const seqStore_t* const seqStorePtr = ZSTD_getSeqStore(esr.zc); + + /* literals stats */ + { const BYTE* bytePtr; + for(bytePtr = seqStorePtr->litStart; bytePtr < seqStorePtr->lit; bytePtr++) + countLit[*bytePtr]++; + } + + /* seqStats */ + { U32 const nbSeq = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); + ZSTD_seqToCodes(seqStorePtr); + + { const BYTE* codePtr = seqStorePtr->ofCode; + U32 u; + for (u=0; umlCode; + U32 u; + for (u=0; ullCode; + U32 u; + for (u=0; u= 2) { /* rep offsets */ + const seqDef* const seq = seqStorePtr->sequencesStart; + U32 offset1 = seq[0].offBase - ZSTD_REP_NUM; + U32 offset2 = seq[1].offBase - ZSTD_REP_NUM; + if (offset1 >= MAXREPOFFSET) offset1 = 0; + if (offset2 >= MAXREPOFFSET) offset2 = 0; + repOffsets[offset1] += 3; + repOffsets[offset2] += 1; + } } } +} + +static size_t ZDICT_totalSampleSize(const size_t* fileSizes, unsigned nbFiles) +{ + size_t total=0; + unsigned u; + for (u=0; u0; u--) { + offsetCount_t tmp; + if (table[u-1].count >= table[u].count) break; + tmp = table[u-1]; + table[u-1] = table[u]; + table[u] = tmp; + } +} + +/* ZDICT_flatLit() : + * rewrite `countLit` to contain a mostly flat but still compressible distribution of literals. + * necessary to avoid generating a non-compressible distribution that HUF_writeCTable() cannot encode. + */ +static void ZDICT_flatLit(unsigned* countLit) +{ + int u; + for (u=1; u<256; u++) countLit[u] = 2; + countLit[0] = 4; + countLit[253] = 1; + countLit[254] = 1; +} + +#define OFFCODE_MAX 30 /* only applicable to first block */ +static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize, + int compressionLevel, + const void* srcBuffer, const size_t* fileSizes, unsigned nbFiles, + const void* dictBuffer, size_t dictBufferSize, + unsigned notificationLevel) +{ + unsigned countLit[256]; + HUF_CREATE_STATIC_CTABLE(hufTable, 255); + unsigned offcodeCount[OFFCODE_MAX+1]; + short offcodeNCount[OFFCODE_MAX+1]; + U32 offcodeMax = ZSTD_highbit32((U32)(dictBufferSize + 128 KB)); + unsigned matchLengthCount[MaxML+1]; + short matchLengthNCount[MaxML+1]; + unsigned litLengthCount[MaxLL+1]; + short litLengthNCount[MaxLL+1]; + U32 repOffset[MAXREPOFFSET]; + offsetCount_t bestRepOffset[ZSTD_REP_NUM+1]; + EStats_ress_t esr = { NULL, NULL, NULL }; + ZSTD_parameters params; + U32 u, huffLog = 11, Offlog = OffFSELog, mlLog = MLFSELog, llLog = LLFSELog, total; + size_t pos = 0, errorCode; + size_t eSize = 0; + size_t const totalSrcSize = ZDICT_totalSampleSize(fileSizes, nbFiles); + size_t const averageSampleSize = totalSrcSize / (nbFiles + !nbFiles); + BYTE* dstPtr = (BYTE*)dstBuffer; + U32 wksp[HUF_CTABLE_WORKSPACE_SIZE_U32]; + + /* init */ + DEBUGLOG(4, "ZDICT_analyzeEntropy"); + if (offcodeMax>OFFCODE_MAX) { eSize = ERROR(dictionaryCreation_failed); goto _cleanup; } /* too large dictionary */ + for (u=0; u<256; u++) countLit[u] = 1; /* any character must be described */ + for (u=0; u<=offcodeMax; u++) offcodeCount[u] = 1; + for (u=0; u<=MaxML; u++) matchLengthCount[u] = 1; + for (u=0; u<=MaxLL; u++) litLengthCount[u] = 1; + memset(repOffset, 0, sizeof(repOffset)); + repOffset[1] = repOffset[4] = repOffset[8] = 1; + memset(bestRepOffset, 0, sizeof(bestRepOffset)); + if (compressionLevel==0) compressionLevel = ZSTD_CLEVEL_DEFAULT; + params = ZSTD_getParams(compressionLevel, averageSampleSize, dictBufferSize); + + esr.dict = ZSTD_createCDict_advanced(dictBuffer, dictBufferSize, ZSTD_dlm_byRef, ZSTD_dct_rawContent, params.cParams, ZSTD_defaultCMem); + esr.zc = ZSTD_createCCtx(); + esr.workPlace = malloc(ZSTD_BLOCKSIZE_MAX); + if (!esr.dict || !esr.zc || !esr.workPlace) { + eSize = ERROR(memory_allocation); + DISPLAYLEVEL(1, "Not enough memory \n"); + goto _cleanup; + } + + /* collect stats on all samples */ + for (u=0; u= 4) { + /* writeStats */ + DISPLAYLEVEL(4, "Offset Code Frequencies : \n"); + for (u=0; u<=offcodeMax; u++) { + DISPLAYLEVEL(4, "%2u :%7u \n", u, offcodeCount[u]); + } } + + /* analyze, build stats, starting with literals */ + { size_t maxNbBits = HUF_buildCTable_wksp(hufTable, countLit, 255, huffLog, wksp, sizeof(wksp)); + if (HUF_isError(maxNbBits)) { + eSize = maxNbBits; + DISPLAYLEVEL(1, " HUF_buildCTable error \n"); + goto _cleanup; + } + if (maxNbBits==8) { /* not compressible : will fail on HUF_writeCTable() */ + DISPLAYLEVEL(2, "warning : pathological dataset : literals are not compressible : samples are noisy or too regular \n"); + ZDICT_flatLit(countLit); /* replace distribution by a fake "mostly flat but still compressible" distribution, that HUF_writeCTable() can encode */ + maxNbBits = HUF_buildCTable_wksp(hufTable, countLit, 255, huffLog, wksp, sizeof(wksp)); + assert(maxNbBits==9); + } + huffLog = (U32)maxNbBits; + } + + /* looking for most common first offsets */ + { U32 offset; + for (offset=1; offset dictBufferCapacity) { + dictContentSize = dictBufferCapacity - hSize; + } + + /* Pad the dictionary content with zeros if it is too small */ + if (dictContentSize < minContentSize) { + RETURN_ERROR_IF(hSize + minContentSize > dictBufferCapacity, dstSize_tooSmall, + "dictBufferCapacity too small to fit max repcode"); + paddingSize = minContentSize - dictContentSize; + } else { + paddingSize = 0; + } + + { + size_t const dictSize = hSize + paddingSize + dictContentSize; + + /* The dictionary consists of the header, optional padding, and the content. + * The padding comes before the content because the "best" position in the + * dictionary is the last byte. + */ + BYTE* const outDictHeader = (BYTE*)dictBuffer; + BYTE* const outDictPadding = outDictHeader + hSize; + BYTE* const outDictContent = outDictPadding + paddingSize; + + assert(dictSize <= dictBufferCapacity); + assert(outDictContent + dictContentSize == (BYTE*)dictBuffer + dictSize); + + /* First copy the customDictContent into its final location. + * `customDictContent` and `dictBuffer` may overlap, so we must + * do this before any other writes into the output buffer. + * Then copy the header & padding into the output buffer. + */ + memmove(outDictContent, customDictContent, dictContentSize); + memcpy(outDictHeader, header, hSize); + memset(outDictPadding, 0, paddingSize); + + return dictSize; + } +} + + +static size_t ZDICT_addEntropyTablesFromBuffer_advanced( + void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity, + const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, + ZDICT_params_t params) +{ + int const compressionLevel = (params.compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : params.compressionLevel; + U32 const notificationLevel = params.notificationLevel; + size_t hSize = 8; + + /* calculate entropy tables */ + DISPLAYLEVEL(2, "\r%70s\r", ""); /* clean display line */ + DISPLAYLEVEL(2, "statistics ... \n"); + { size_t const eSize = ZDICT_analyzeEntropy((char*)dictBuffer+hSize, dictBufferCapacity-hSize, + compressionLevel, + samplesBuffer, samplesSizes, nbSamples, + (char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize, + notificationLevel); + if (ZDICT_isError(eSize)) return eSize; + hSize += eSize; + } + + /* add dictionary header (after entropy tables) */ + MEM_writeLE32(dictBuffer, ZSTD_MAGIC_DICTIONARY); + { U64 const randomID = XXH64((char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize, 0); + U32 const compliantID = (randomID % ((1U<<31)-32768)) + 32768; + U32 const dictID = params.dictID ? params.dictID : compliantID; + MEM_writeLE32((char*)dictBuffer+4, dictID); + } + + if (hSize + dictContentSize < dictBufferCapacity) + memmove((char*)dictBuffer + hSize, (char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize); + return MIN(dictBufferCapacity, hSize+dictContentSize); +} + +/*! ZDICT_trainFromBuffer_unsafe_legacy() : +* Warning : `samplesBuffer` must be followed by noisy guard band !!! +* @return : size of dictionary, or an error code which can be tested with ZDICT_isError() +*/ +static size_t ZDICT_trainFromBuffer_unsafe_legacy( + void* dictBuffer, size_t maxDictSize, + const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, + ZDICT_legacy_params_t params) +{ + U32 const dictListSize = MAX(MAX(DICTLISTSIZE_DEFAULT, nbSamples), (U32)(maxDictSize/16)); + dictItem* const dictList = (dictItem*)malloc(dictListSize * sizeof(*dictList)); + unsigned const selectivity = params.selectivityLevel == 0 ? g_selectivity_default : params.selectivityLevel; + unsigned const minRep = (selectivity > 30) ? MINRATIO : nbSamples >> selectivity; + size_t const targetDictSize = maxDictSize; + size_t const samplesBuffSize = ZDICT_totalSampleSize(samplesSizes, nbSamples); + size_t dictSize = 0; + U32 const notificationLevel = params.zParams.notificationLevel; + + /* checks */ + if (!dictList) return ERROR(memory_allocation); + if (maxDictSize < ZDICT_DICTSIZE_MIN) { free(dictList); return ERROR(dstSize_tooSmall); } /* requested dictionary size is too small */ + if (samplesBuffSize < ZDICT_MIN_SAMPLES_SIZE) { free(dictList); return ERROR(dictionaryCreation_failed); } /* not enough source to create dictionary */ + + /* init */ + ZDICT_initDictItem(dictList); + + /* build dictionary */ + ZDICT_trainBuffer_legacy(dictList, dictListSize, + samplesBuffer, samplesBuffSize, + samplesSizes, nbSamples, + minRep, notificationLevel); + + /* display best matches */ + if (params.zParams.notificationLevel>= 3) { + unsigned const nb = MIN(25, dictList[0].pos); + unsigned const dictContentSize = ZDICT_dictSize(dictList); + unsigned u; + DISPLAYLEVEL(3, "\n %u segments found, of total size %u \n", (unsigned)dictList[0].pos-1, dictContentSize); + DISPLAYLEVEL(3, "list %u best segments \n", nb-1); + for (u=1; u samplesBuffSize) || ((pos + length) > samplesBuffSize)) { + free(dictList); + return ERROR(GENERIC); /* should never happen */ + } + DISPLAYLEVEL(3, "%3u:%3u bytes at pos %8u, savings %7u bytes |", + u, length, pos, (unsigned)dictList[u].savings); + ZDICT_printHex((const char*)samplesBuffer+pos, printedLength); + DISPLAYLEVEL(3, "| \n"); + } } + + + /* create dictionary */ + { unsigned dictContentSize = ZDICT_dictSize(dictList); + if (dictContentSize < ZDICT_CONTENTSIZE_MIN) { free(dictList); return ERROR(dictionaryCreation_failed); } /* dictionary content too small */ + if (dictContentSize < targetDictSize/4) { + DISPLAYLEVEL(2, "! warning : selected content significantly smaller than requested (%u < %u) \n", dictContentSize, (unsigned)maxDictSize); + if (samplesBuffSize < 10 * targetDictSize) + DISPLAYLEVEL(2, "! consider increasing the number of samples (total size : %u MB)\n", (unsigned)(samplesBuffSize>>20)); + if (minRep > MINRATIO) { + DISPLAYLEVEL(2, "! consider increasing selectivity to produce larger dictionary (-s%u) \n", selectivity+1); + DISPLAYLEVEL(2, "! note : larger dictionaries are not necessarily better, test its efficiency on samples \n"); + } + } + + if ((dictContentSize > targetDictSize*3) && (nbSamples > 2*MINRATIO) && (selectivity>1)) { + unsigned proposedSelectivity = selectivity-1; + while ((nbSamples >> proposedSelectivity) <= MINRATIO) { proposedSelectivity--; } + DISPLAYLEVEL(2, "! note : calculated dictionary significantly larger than requested (%u > %u) \n", dictContentSize, (unsigned)maxDictSize); + DISPLAYLEVEL(2, "! consider increasing dictionary size, or produce denser dictionary (-s%u) \n", proposedSelectivity); + DISPLAYLEVEL(2, "! always test dictionary efficiency on real samples \n"); + } + + /* limit dictionary size */ + { U32 const max = dictList->pos; /* convention : nb of useful elts within dictList */ + U32 currentSize = 0; + U32 n; for (n=1; n targetDictSize) { currentSize -= dictList[n].length; break; } + } + dictList->pos = n; + dictContentSize = currentSize; + } + + /* build dict content */ + { U32 u; + BYTE* ptr = (BYTE*)dictBuffer + maxDictSize; + for (u=1; upos; u++) { + U32 l = dictList[u].length; + ptr -= l; + if (ptr<(BYTE*)dictBuffer) { free(dictList); return ERROR(GENERIC); } /* should not happen */ + memcpy(ptr, (const char*)samplesBuffer+dictList[u].pos, l); + } } + + dictSize = ZDICT_addEntropyTablesFromBuffer_advanced(dictBuffer, dictContentSize, maxDictSize, + samplesBuffer, samplesSizes, nbSamples, + params.zParams); + } + + /* clean up */ + free(dictList); + return dictSize; +} + + +/* ZDICT_trainFromBuffer_legacy() : + * issue : samplesBuffer need to be followed by a noisy guard band. + * work around : duplicate the buffer, and add the noise */ +size_t ZDICT_trainFromBuffer_legacy(void* dictBuffer, size_t dictBufferCapacity, + const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, + ZDICT_legacy_params_t params) +{ + size_t result; + void* newBuff; + size_t const sBuffSize = ZDICT_totalSampleSize(samplesSizes, nbSamples); + if (sBuffSize < ZDICT_MIN_SAMPLES_SIZE) return 0; /* not enough content => no dictionary */ + + newBuff = malloc(sBuffSize + NOISELENGTH); + if (!newBuff) return ERROR(memory_allocation); + + memcpy(newBuff, samplesBuffer, sBuffSize); + ZDICT_fillNoise((char*)newBuff + sBuffSize, NOISELENGTH); /* guard band, for end of buffer condition */ + + result = + ZDICT_trainFromBuffer_unsafe_legacy(dictBuffer, dictBufferCapacity, newBuff, + samplesSizes, nbSamples, params); + free(newBuff); + return result; +} + + +size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity, + const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples) +{ + ZDICT_fastCover_params_t params; + DEBUGLOG(3, "ZDICT_trainFromBuffer"); + memset(¶ms, 0, sizeof(params)); + params.d = 8; + params.steps = 4; + /* Use default level since no compression level information is available */ + params.zParams.compressionLevel = ZSTD_CLEVEL_DEFAULT; +#if defined(DEBUGLEVEL) && (DEBUGLEVEL>=1) + params.zParams.notificationLevel = DEBUGLEVEL; +#endif + return ZDICT_optimizeTrainFromBuffer_fastCover(dictBuffer, dictBufferCapacity, + samplesBuffer, samplesSizes, nbSamples, + ¶ms); +} + +size_t ZDICT_addEntropyTablesFromBuffer(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity, + const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples) +{ + ZDICT_params_t params; + memset(¶ms, 0, sizeof(params)); + return ZDICT_addEntropyTablesFromBuffer_advanced(dictBuffer, dictContentSize, dictBufferCapacity, + samplesBuffer, samplesSizes, nbSamples, + params); +} diff --git a/c-blosc/internal-complibs/zstd-1.5.6/legacy/zstd_legacy.h b/c-blosc/internal-complibs/zstd-1.5.6/legacy/zstd_legacy.h new file mode 100644 index 0000000..7a8a04e --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.5.6/legacy/zstd_legacy.h @@ -0,0 +1,452 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_LEGACY_H +#define ZSTD_LEGACY_H + +#if defined (__cplusplus) +extern "C" { +#endif + +/* ************************************* +* Includes +***************************************/ +#include "../common/mem.h" /* MEM_STATIC */ +#include "../common/error_private.h" /* ERROR */ +#include "../common/zstd_internal.h" /* ZSTD_inBuffer, ZSTD_outBuffer, ZSTD_frameSizeInfo */ + +#if !defined (ZSTD_LEGACY_SUPPORT) || (ZSTD_LEGACY_SUPPORT == 0) +# undef ZSTD_LEGACY_SUPPORT +# define ZSTD_LEGACY_SUPPORT 8 +#endif + +#if (ZSTD_LEGACY_SUPPORT <= 1) +# include "zstd_v01.h" +#endif +#if (ZSTD_LEGACY_SUPPORT <= 2) +# include "zstd_v02.h" +#endif +#if (ZSTD_LEGACY_SUPPORT <= 3) +# include "zstd_v03.h" +#endif +#if (ZSTD_LEGACY_SUPPORT <= 4) +# include "zstd_v04.h" +#endif +#if (ZSTD_LEGACY_SUPPORT <= 5) +# include "zstd_v05.h" +#endif +#if (ZSTD_LEGACY_SUPPORT <= 6) +# include "zstd_v06.h" +#endif +#if (ZSTD_LEGACY_SUPPORT <= 7) +# include "zstd_v07.h" +#endif + +/** ZSTD_isLegacy() : + @return : > 0 if supported by legacy decoder. 0 otherwise. + return value is the version. +*/ +MEM_STATIC unsigned ZSTD_isLegacy(const void* src, size_t srcSize) +{ + U32 magicNumberLE; + if (srcSize<4) return 0; + magicNumberLE = MEM_readLE32(src); + switch(magicNumberLE) + { +#if (ZSTD_LEGACY_SUPPORT <= 1) + case ZSTDv01_magicNumberLE:return 1; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 2) + case ZSTDv02_magicNumber : return 2; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 3) + case ZSTDv03_magicNumber : return 3; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 4) + case ZSTDv04_magicNumber : return 4; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 5) + case ZSTDv05_MAGICNUMBER : return 5; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 6) + case ZSTDv06_MAGICNUMBER : return 6; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 7) + case ZSTDv07_MAGICNUMBER : return 7; +#endif + default : return 0; + } +} + + +MEM_STATIC unsigned long long ZSTD_getDecompressedSize_legacy(const void* src, size_t srcSize) +{ + U32 const version = ZSTD_isLegacy(src, srcSize); + if (version < 5) return 0; /* no decompressed size in frame header, or not a legacy format */ +#if (ZSTD_LEGACY_SUPPORT <= 5) + if (version==5) { + ZSTDv05_parameters fParams; + size_t const frResult = ZSTDv05_getFrameParams(&fParams, src, srcSize); + if (frResult != 0) return 0; + return fParams.srcSize; + } +#endif +#if (ZSTD_LEGACY_SUPPORT <= 6) + if (version==6) { + ZSTDv06_frameParams fParams; + size_t const frResult = ZSTDv06_getFrameParams(&fParams, src, srcSize); + if (frResult != 0) return 0; + return fParams.frameContentSize; + } +#endif +#if (ZSTD_LEGACY_SUPPORT <= 7) + if (version==7) { + ZSTDv07_frameParams fParams; + size_t const frResult = ZSTDv07_getFrameParams(&fParams, src, srcSize); + if (frResult != 0) return 0; + return fParams.frameContentSize; + } +#endif + return 0; /* should not be possible */ +} + + +MEM_STATIC size_t ZSTD_decompressLegacy( + void* dst, size_t dstCapacity, + const void* src, size_t compressedSize, + const void* dict,size_t dictSize) +{ + U32 const version = ZSTD_isLegacy(src, compressedSize); + char x; + /* Avoid passing NULL to legacy decoding. */ + if (dst == NULL) { + assert(dstCapacity == 0); + dst = &x; + } + if (src == NULL) { + assert(compressedSize == 0); + src = &x; + } + if (dict == NULL) { + assert(dictSize == 0); + dict = &x; + } + (void)dst; (void)dstCapacity; (void)dict; (void)dictSize; /* unused when ZSTD_LEGACY_SUPPORT >= 8 */ + switch(version) + { +#if (ZSTD_LEGACY_SUPPORT <= 1) + case 1 : + return ZSTDv01_decompress(dst, dstCapacity, src, compressedSize); +#endif +#if (ZSTD_LEGACY_SUPPORT <= 2) + case 2 : + return ZSTDv02_decompress(dst, dstCapacity, src, compressedSize); +#endif +#if (ZSTD_LEGACY_SUPPORT <= 3) + case 3 : + return ZSTDv03_decompress(dst, dstCapacity, src, compressedSize); +#endif +#if (ZSTD_LEGACY_SUPPORT <= 4) + case 4 : + return ZSTDv04_decompress(dst, dstCapacity, src, compressedSize); +#endif +#if (ZSTD_LEGACY_SUPPORT <= 5) + case 5 : + { size_t result; + ZSTDv05_DCtx* const zd = ZSTDv05_createDCtx(); + if (zd==NULL) return ERROR(memory_allocation); + result = ZSTDv05_decompress_usingDict(zd, dst, dstCapacity, src, compressedSize, dict, dictSize); + ZSTDv05_freeDCtx(zd); + return result; + } +#endif +#if (ZSTD_LEGACY_SUPPORT <= 6) + case 6 : + { size_t result; + ZSTDv06_DCtx* const zd = ZSTDv06_createDCtx(); + if (zd==NULL) return ERROR(memory_allocation); + result = ZSTDv06_decompress_usingDict(zd, dst, dstCapacity, src, compressedSize, dict, dictSize); + ZSTDv06_freeDCtx(zd); + return result; + } +#endif +#if (ZSTD_LEGACY_SUPPORT <= 7) + case 7 : + { size_t result; + ZSTDv07_DCtx* const zd = ZSTDv07_createDCtx(); + if (zd==NULL) return ERROR(memory_allocation); + result = ZSTDv07_decompress_usingDict(zd, dst, dstCapacity, src, compressedSize, dict, dictSize); + ZSTDv07_freeDCtx(zd); + return result; + } +#endif + default : + return ERROR(prefix_unknown); + } +} + +MEM_STATIC ZSTD_frameSizeInfo ZSTD_findFrameSizeInfoLegacy(const void *src, size_t srcSize) +{ + ZSTD_frameSizeInfo frameSizeInfo; + U32 const version = ZSTD_isLegacy(src, srcSize); + switch(version) + { +#if (ZSTD_LEGACY_SUPPORT <= 1) + case 1 : + ZSTDv01_findFrameSizeInfoLegacy(src, srcSize, + &frameSizeInfo.compressedSize, + &frameSizeInfo.decompressedBound); + break; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 2) + case 2 : + ZSTDv02_findFrameSizeInfoLegacy(src, srcSize, + &frameSizeInfo.compressedSize, + &frameSizeInfo.decompressedBound); + break; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 3) + case 3 : + ZSTDv03_findFrameSizeInfoLegacy(src, srcSize, + &frameSizeInfo.compressedSize, + &frameSizeInfo.decompressedBound); + break; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 4) + case 4 : + ZSTDv04_findFrameSizeInfoLegacy(src, srcSize, + &frameSizeInfo.compressedSize, + &frameSizeInfo.decompressedBound); + break; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 5) + case 5 : + ZSTDv05_findFrameSizeInfoLegacy(src, srcSize, + &frameSizeInfo.compressedSize, + &frameSizeInfo.decompressedBound); + break; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 6) + case 6 : + ZSTDv06_findFrameSizeInfoLegacy(src, srcSize, + &frameSizeInfo.compressedSize, + &frameSizeInfo.decompressedBound); + break; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 7) + case 7 : + ZSTDv07_findFrameSizeInfoLegacy(src, srcSize, + &frameSizeInfo.compressedSize, + &frameSizeInfo.decompressedBound); + break; +#endif + default : + frameSizeInfo.compressedSize = ERROR(prefix_unknown); + frameSizeInfo.decompressedBound = ZSTD_CONTENTSIZE_ERROR; + break; + } + if (!ZSTD_isError(frameSizeInfo.compressedSize) && frameSizeInfo.compressedSize > srcSize) { + frameSizeInfo.compressedSize = ERROR(srcSize_wrong); + frameSizeInfo.decompressedBound = ZSTD_CONTENTSIZE_ERROR; + } + /* In all cases, decompressedBound == nbBlocks * ZSTD_BLOCKSIZE_MAX. + * So we can compute nbBlocks without having to change every function. + */ + if (frameSizeInfo.decompressedBound != ZSTD_CONTENTSIZE_ERROR) { + assert((frameSizeInfo.decompressedBound & (ZSTD_BLOCKSIZE_MAX - 1)) == 0); + frameSizeInfo.nbBlocks = (size_t)(frameSizeInfo.decompressedBound / ZSTD_BLOCKSIZE_MAX); + } + return frameSizeInfo; +} + +MEM_STATIC size_t ZSTD_findFrameCompressedSizeLegacy(const void *src, size_t srcSize) +{ + ZSTD_frameSizeInfo frameSizeInfo = ZSTD_findFrameSizeInfoLegacy(src, srcSize); + return frameSizeInfo.compressedSize; +} + +MEM_STATIC size_t ZSTD_freeLegacyStreamContext(void* legacyContext, U32 version) +{ + switch(version) + { + default : + case 1 : + case 2 : + case 3 : + (void)legacyContext; + return ERROR(version_unsupported); +#if (ZSTD_LEGACY_SUPPORT <= 4) + case 4 : return ZBUFFv04_freeDCtx((ZBUFFv04_DCtx*)legacyContext); +#endif +#if (ZSTD_LEGACY_SUPPORT <= 5) + case 5 : return ZBUFFv05_freeDCtx((ZBUFFv05_DCtx*)legacyContext); +#endif +#if (ZSTD_LEGACY_SUPPORT <= 6) + case 6 : return ZBUFFv06_freeDCtx((ZBUFFv06_DCtx*)legacyContext); +#endif +#if (ZSTD_LEGACY_SUPPORT <= 7) + case 7 : return ZBUFFv07_freeDCtx((ZBUFFv07_DCtx*)legacyContext); +#endif + } +} + + +MEM_STATIC size_t ZSTD_initLegacyStream(void** legacyContext, U32 prevVersion, U32 newVersion, + const void* dict, size_t dictSize) +{ + char x; + /* Avoid passing NULL to legacy decoding. */ + if (dict == NULL) { + assert(dictSize == 0); + dict = &x; + } + DEBUGLOG(5, "ZSTD_initLegacyStream for v0.%u", newVersion); + if (prevVersion != newVersion) ZSTD_freeLegacyStreamContext(*legacyContext, prevVersion); + switch(newVersion) + { + default : + case 1 : + case 2 : + case 3 : + (void)dict; (void)dictSize; + return 0; +#if (ZSTD_LEGACY_SUPPORT <= 4) + case 4 : + { + ZBUFFv04_DCtx* dctx = (prevVersion != newVersion) ? ZBUFFv04_createDCtx() : (ZBUFFv04_DCtx*)*legacyContext; + if (dctx==NULL) return ERROR(memory_allocation); + ZBUFFv04_decompressInit(dctx); + ZBUFFv04_decompressWithDictionary(dctx, dict, dictSize); + *legacyContext = dctx; + return 0; + } +#endif +#if (ZSTD_LEGACY_SUPPORT <= 5) + case 5 : + { + ZBUFFv05_DCtx* dctx = (prevVersion != newVersion) ? ZBUFFv05_createDCtx() : (ZBUFFv05_DCtx*)*legacyContext; + if (dctx==NULL) return ERROR(memory_allocation); + ZBUFFv05_decompressInitDictionary(dctx, dict, dictSize); + *legacyContext = dctx; + return 0; + } +#endif +#if (ZSTD_LEGACY_SUPPORT <= 6) + case 6 : + { + ZBUFFv06_DCtx* dctx = (prevVersion != newVersion) ? ZBUFFv06_createDCtx() : (ZBUFFv06_DCtx*)*legacyContext; + if (dctx==NULL) return ERROR(memory_allocation); + ZBUFFv06_decompressInitDictionary(dctx, dict, dictSize); + *legacyContext = dctx; + return 0; + } +#endif +#if (ZSTD_LEGACY_SUPPORT <= 7) + case 7 : + { + ZBUFFv07_DCtx* dctx = (prevVersion != newVersion) ? ZBUFFv07_createDCtx() : (ZBUFFv07_DCtx*)*legacyContext; + if (dctx==NULL) return ERROR(memory_allocation); + ZBUFFv07_decompressInitDictionary(dctx, dict, dictSize); + *legacyContext = dctx; + return 0; + } +#endif + } +} + + + +MEM_STATIC size_t ZSTD_decompressLegacyStream(void* legacyContext, U32 version, + ZSTD_outBuffer* output, ZSTD_inBuffer* input) +{ + static char x; + /* Avoid passing NULL to legacy decoding. */ + if (output->dst == NULL) { + assert(output->size == 0); + output->dst = &x; + } + if (input->src == NULL) { + assert(input->size == 0); + input->src = &x; + } + DEBUGLOG(5, "ZSTD_decompressLegacyStream for v0.%u", version); + switch(version) + { + default : + case 1 : + case 2 : + case 3 : + (void)legacyContext; (void)output; (void)input; + return ERROR(version_unsupported); +#if (ZSTD_LEGACY_SUPPORT <= 4) + case 4 : + { + ZBUFFv04_DCtx* dctx = (ZBUFFv04_DCtx*) legacyContext; + const void* src = (const char*)input->src + input->pos; + size_t readSize = input->size - input->pos; + void* dst = (char*)output->dst + output->pos; + size_t decodedSize = output->size - output->pos; + size_t const hintSize = ZBUFFv04_decompressContinue(dctx, dst, &decodedSize, src, &readSize); + output->pos += decodedSize; + input->pos += readSize; + return hintSize; + } +#endif +#if (ZSTD_LEGACY_SUPPORT <= 5) + case 5 : + { + ZBUFFv05_DCtx* dctx = (ZBUFFv05_DCtx*) legacyContext; + const void* src = (const char*)input->src + input->pos; + size_t readSize = input->size - input->pos; + void* dst = (char*)output->dst + output->pos; + size_t decodedSize = output->size - output->pos; + size_t const hintSize = ZBUFFv05_decompressContinue(dctx, dst, &decodedSize, src, &readSize); + output->pos += decodedSize; + input->pos += readSize; + return hintSize; + } +#endif +#if (ZSTD_LEGACY_SUPPORT <= 6) + case 6 : + { + ZBUFFv06_DCtx* dctx = (ZBUFFv06_DCtx*) legacyContext; + const void* src = (const char*)input->src + input->pos; + size_t readSize = input->size - input->pos; + void* dst = (char*)output->dst + output->pos; + size_t decodedSize = output->size - output->pos; + size_t const hintSize = ZBUFFv06_decompressContinue(dctx, dst, &decodedSize, src, &readSize); + output->pos += decodedSize; + input->pos += readSize; + return hintSize; + } +#endif +#if (ZSTD_LEGACY_SUPPORT <= 7) + case 7 : + { + ZBUFFv07_DCtx* dctx = (ZBUFFv07_DCtx*) legacyContext; + const void* src = (const char*)input->src + input->pos; + size_t readSize = input->size - input->pos; + void* dst = (char*)output->dst + output->pos; + size_t decodedSize = output->size - output->pos; + size_t const hintSize = ZBUFFv07_decompressContinue(dctx, dst, &decodedSize, src, &readSize); + output->pos += decodedSize; + input->pos += readSize; + return hintSize; + } +#endif + } +} + + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_LEGACY_H */ diff --git a/c-blosc/internal-complibs/zstd-1.5.6/legacy/zstd_v01.c b/c-blosc/internal-complibs/zstd-1.5.6/legacy/zstd_v01.c new file mode 100644 index 0000000..6cf5123 --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.5.6/legacy/zstd_v01.c @@ -0,0 +1,2127 @@ +/* + * Copyright (c) Yann Collet, Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +/****************************************** +* Includes +******************************************/ +#include /* size_t, ptrdiff_t */ +#include "zstd_v01.h" +#include "../common/compiler.h" +#include "../common/error_private.h" + + +/****************************************** +* Static allocation +******************************************/ +/* You can statically allocate FSE CTable/DTable as a table of unsigned using below macro */ +#define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1<2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.) +* Increasing memory usage improves compression ratio +* Reduced memory usage can improve speed, due to cache effect +* Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */ +#define FSE_MAX_MEMORY_USAGE 14 +#define FSE_DEFAULT_MEMORY_USAGE 13 + +/* FSE_MAX_SYMBOL_VALUE : +* Maximum symbol value authorized. +* Required for proper stack allocation */ +#define FSE_MAX_SYMBOL_VALUE 255 + + +/**************************************************************** +* template functions type & suffix +****************************************************************/ +#define FSE_FUNCTION_TYPE BYTE +#define FSE_FUNCTION_EXTENSION + + +/**************************************************************** +* Byte symbol type +****************************************************************/ +typedef struct +{ + unsigned short newState; + unsigned char symbol; + unsigned char nbBits; +} FSE_decode_t; /* size == U32 */ + + + +/**************************************************************** +* Compiler specifics +****************************************************************/ +#ifdef _MSC_VER /* Visual Studio */ +# define FORCE_INLINE static __forceinline +# include /* For Visual 2005 */ +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +# pragma warning(disable : 4214) /* disable: C4214: non-int bitfields */ +#else +# define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) +# if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ +# ifdef __GNUC__ +# define FORCE_INLINE static inline __attribute__((always_inline)) +# else +# define FORCE_INLINE static inline +# endif +# else +# define FORCE_INLINE static +# endif /* __STDC_VERSION__ */ +#endif + + +/**************************************************************** +* Includes +****************************************************************/ +#include /* malloc, free, qsort */ +#include /* memcpy, memset */ +#include /* printf (debug) */ + + +#ifndef MEM_ACCESS_MODULE +#define MEM_ACCESS_MODULE +/**************************************************************** +* Basic Types +*****************************************************************/ +#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ +# include +typedef uint8_t BYTE; +typedef uint16_t U16; +typedef int16_t S16; +typedef uint32_t U32; +typedef int32_t S32; +typedef uint64_t U64; +typedef int64_t S64; +#else +typedef unsigned char BYTE; +typedef unsigned short U16; +typedef signed short S16; +typedef unsigned int U32; +typedef signed int S32; +typedef unsigned long long U64; +typedef signed long long S64; +#endif + +#endif /* MEM_ACCESS_MODULE */ + +/**************************************************************** +* Memory I/O +*****************************************************************/ + +static unsigned FSE_32bits(void) +{ + return sizeof(void*)==4; +} + +static unsigned FSE_isLittleEndian(void) +{ + const union { U32 i; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */ + return one.c[0]; +} + +static U16 FSE_read16(const void* memPtr) +{ + U16 val; memcpy(&val, memPtr, sizeof(val)); return val; +} + +static U32 FSE_read32(const void* memPtr) +{ + U32 val; memcpy(&val, memPtr, sizeof(val)); return val; +} + +static U64 FSE_read64(const void* memPtr) +{ + U64 val; memcpy(&val, memPtr, sizeof(val)); return val; +} + +static U16 FSE_readLE16(const void* memPtr) +{ + if (FSE_isLittleEndian()) + return FSE_read16(memPtr); + else + { + const BYTE* p = (const BYTE*)memPtr; + return (U16)(p[0] + (p[1]<<8)); + } +} + +static U32 FSE_readLE32(const void* memPtr) +{ + if (FSE_isLittleEndian()) + return FSE_read32(memPtr); + else + { + const BYTE* p = (const BYTE*)memPtr; + return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24)); + } +} + + +static U64 FSE_readLE64(const void* memPtr) +{ + if (FSE_isLittleEndian()) + return FSE_read64(memPtr); + else + { + const BYTE* p = (const BYTE*)memPtr; + return (U64)((U64)p[0] + ((U64)p[1]<<8) + ((U64)p[2]<<16) + ((U64)p[3]<<24) + + ((U64)p[4]<<32) + ((U64)p[5]<<40) + ((U64)p[6]<<48) + ((U64)p[7]<<56)); + } +} + +static size_t FSE_readLEST(const void* memPtr) +{ + if (FSE_32bits()) + return (size_t)FSE_readLE32(memPtr); + else + return (size_t)FSE_readLE64(memPtr); +} + + + +/**************************************************************** +* Constants +*****************************************************************/ +#define FSE_MAX_TABLELOG (FSE_MAX_MEMORY_USAGE-2) +#define FSE_MAX_TABLESIZE (1U< FSE_TABLELOG_ABSOLUTE_MAX +#error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported" +#endif + + +/**************************************************************** +* Error Management +****************************************************************/ +#define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ + + +/**************************************************************** +* Complex types +****************************************************************/ +typedef struct +{ + int deltaFindState; + U32 deltaNbBits; +} FSE_symbolCompressionTransform; /* total 8 bytes */ + +typedef U32 DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)]; + +/**************************************************************** +* Internal functions +****************************************************************/ +FORCE_INLINE unsigned FSE_highbit32 (U32 val) +{ +# if defined(_MSC_VER) /* Visual */ + unsigned long r; + return _BitScanReverse(&r, val) ? (unsigned)r : 0; +# elif defined(__GNUC__) && (GCC_VERSION >= 304) /* GCC Intrinsic */ + return __builtin_clz (val) ^ 31; +# else /* Software version */ + static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; + U32 v = val; + unsigned r; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + r = DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27]; + return r; +# endif +} + + +/**************************************************************** +* Templates +****************************************************************/ +/* + designed to be included + for type-specific functions (template emulation in C) + Objective is to write these functions only once, for improved maintenance +*/ + +/* safety checks */ +#ifndef FSE_FUNCTION_EXTENSION +# error "FSE_FUNCTION_EXTENSION must be defined" +#endif +#ifndef FSE_FUNCTION_TYPE +# error "FSE_FUNCTION_TYPE must be defined" +#endif + +/* Function names */ +#define FSE_CAT(X,Y) X##Y +#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y) +#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y) + + + +static U32 FSE_tableStep(U32 tableSize) { return (tableSize>>1) + (tableSize>>3) + 3; } + +#define FSE_DECODE_TYPE FSE_decode_t + + +typedef struct { + U16 tableLog; + U16 fastMode; +} FSE_DTableHeader; /* sizeof U32 */ + +static size_t FSE_buildDTable +(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) +{ + void* ptr = dt; + FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr; + FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*)(ptr) + 1; /* because dt is unsigned, 32-bits aligned on 32-bits */ + const U32 tableSize = 1 << tableLog; + const U32 tableMask = tableSize-1; + const U32 step = FSE_tableStep(tableSize); + U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1]; + U32 position = 0; + U32 highThreshold = tableSize-1; + const S16 largeLimit= (S16)(1 << (tableLog-1)); + U32 noLarge = 1; + U32 s; + + /* Sanity Checks */ + if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return (size_t)-FSE_ERROR_maxSymbolValue_tooLarge; + if (tableLog > FSE_MAX_TABLELOG) return (size_t)-FSE_ERROR_tableLog_tooLarge; + + /* Init, lay down lowprob symbols */ + DTableH[0].tableLog = (U16)tableLog; + for (s=0; s<=maxSymbolValue; s++) + { + if (normalizedCounter[s]==-1) + { + tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s; + symbolNext[s] = 1; + } + else + { + if (normalizedCounter[s] >= largeLimit) noLarge=0; + symbolNext[s] = normalizedCounter[s]; + } + } + + /* Spread symbols */ + for (s=0; s<=maxSymbolValue; s++) + { + int i; + for (i=0; i highThreshold) position = (position + step) & tableMask; /* lowprob area */ + } + } + + if (position!=0) return (size_t)-FSE_ERROR_GENERIC; /* position must reach all cells once, otherwise normalizedCounter is incorrect */ + + /* Build Decoding table */ + { + U32 i; + for (i=0; ifastMode = (U16)noLarge; + return 0; +} + + +/****************************************** +* FSE byte symbol +******************************************/ +#ifndef FSE_COMMONDEFS_ONLY + +static unsigned FSE_isError(size_t code) { return (code > (size_t)(-FSE_ERROR_maxCode)); } + +static short FSE_abs(short a) +{ + return a<0? -a : a; +} + + +/**************************************************************** +* Header bitstream management +****************************************************************/ +static size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, + const void* headerBuffer, size_t hbSize) +{ + const BYTE* const istart = (const BYTE*) headerBuffer; + const BYTE* const iend = istart + hbSize; + const BYTE* ip = istart; + int nbBits; + int remaining; + int threshold; + U32 bitStream; + int bitCount; + unsigned charnum = 0; + int previous0 = 0; + + if (hbSize < 4) return (size_t)-FSE_ERROR_srcSize_wrong; + bitStream = FSE_readLE32(ip); + nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */ + if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return (size_t)-FSE_ERROR_tableLog_tooLarge; + bitStream >>= 4; + bitCount = 4; + *tableLogPtr = nbBits; + remaining = (1<1) && (charnum<=*maxSVPtr)) + { + if (previous0) + { + unsigned n0 = charnum; + while ((bitStream & 0xFFFF) == 0xFFFF) + { + n0+=24; + if (ip < iend-5) + { + ip+=2; + bitStream = FSE_readLE32(ip) >> bitCount; + } + else + { + bitStream >>= 16; + bitCount+=16; + } + } + while ((bitStream & 3) == 3) + { + n0+=3; + bitStream>>=2; + bitCount+=2; + } + n0 += bitStream & 3; + bitCount += 2; + if (n0 > *maxSVPtr) return (size_t)-FSE_ERROR_maxSymbolValue_tooSmall; + while (charnum < n0) normalizedCounter[charnum++] = 0; + if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) + { + ip += bitCount>>3; + bitCount &= 7; + bitStream = FSE_readLE32(ip) >> bitCount; + } + else + bitStream >>= 2; + } + { + const short max = (short)((2*threshold-1)-remaining); + short count; + + if ((bitStream & (threshold-1)) < (U32)max) + { + count = (short)(bitStream & (threshold-1)); + bitCount += nbBits-1; + } + else + { + count = (short)(bitStream & (2*threshold-1)); + if (count >= threshold) count -= max; + bitCount += nbBits; + } + + count--; /* extra accuracy */ + remaining -= FSE_abs(count); + normalizedCounter[charnum++] = count; + previous0 = !count; + while (remaining < threshold) + { + nbBits--; + threshold >>= 1; + } + + { + if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) + { + ip += bitCount>>3; + bitCount &= 7; + } + else + { + bitCount -= (int)(8 * (iend - 4 - ip)); + ip = iend - 4; + } + bitStream = FSE_readLE32(ip) >> (bitCount & 31); + } + } + } + if (remaining != 1) return (size_t)-FSE_ERROR_GENERIC; + *maxSVPtr = charnum-1; + + ip += (bitCount+7)>>3; + if ((size_t)(ip-istart) > hbSize) return (size_t)-FSE_ERROR_srcSize_wrong; + return ip-istart; +} + + +/********************************************************* +* Decompression (Byte symbols) +*********************************************************/ +static size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue) +{ + void* ptr = dt; + FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr; + FSE_decode_t* const cell = (FSE_decode_t*)(ptr) + 1; /* because dt is unsigned */ + + DTableH->tableLog = 0; + DTableH->fastMode = 0; + + cell->newState = 0; + cell->symbol = symbolValue; + cell->nbBits = 0; + + return 0; +} + + +static size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits) +{ + void* ptr = dt; + FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr; + FSE_decode_t* const dinfo = (FSE_decode_t*)(ptr) + 1; /* because dt is unsigned */ + const unsigned tableSize = 1 << nbBits; + const unsigned tableMask = tableSize - 1; + const unsigned maxSymbolValue = tableMask; + unsigned s; + + /* Sanity checks */ + if (nbBits < 1) return (size_t)-FSE_ERROR_GENERIC; /* min size */ + + /* Build Decoding Table */ + DTableH->tableLog = (U16)nbBits; + DTableH->fastMode = 1; + for (s=0; s<=maxSymbolValue; s++) + { + dinfo[s].newState = 0; + dinfo[s].symbol = (BYTE)s; + dinfo[s].nbBits = (BYTE)nbBits; + } + + return 0; +} + + +/* FSE_initDStream + * Initialize a FSE_DStream_t. + * srcBuffer must point at the beginning of an FSE block. + * The function result is the size of the FSE_block (== srcSize). + * If srcSize is too small, the function will return an errorCode; + */ +static size_t FSE_initDStream(FSE_DStream_t* bitD, const void* srcBuffer, size_t srcSize) +{ + if (srcSize < 1) return (size_t)-FSE_ERROR_srcSize_wrong; + + if (srcSize >= sizeof(size_t)) + { + U32 contain32; + bitD->start = (const char*)srcBuffer; + bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(size_t); + bitD->bitContainer = FSE_readLEST(bitD->ptr); + contain32 = ((const BYTE*)srcBuffer)[srcSize-1]; + if (contain32 == 0) return (size_t)-FSE_ERROR_GENERIC; /* stop bit not present */ + bitD->bitsConsumed = 8 - FSE_highbit32(contain32); + } + else + { + U32 contain32; + bitD->start = (const char*)srcBuffer; + bitD->ptr = bitD->start; + bitD->bitContainer = *(const BYTE*)(bitD->start); + switch(srcSize) + { + case 7: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[6]) << (sizeof(size_t)*8 - 16); + /* fallthrough */ + case 6: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[5]) << (sizeof(size_t)*8 - 24); + /* fallthrough */ + case 5: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[4]) << (sizeof(size_t)*8 - 32); + /* fallthrough */ + case 4: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[3]) << 24; + /* fallthrough */ + case 3: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[2]) << 16; + /* fallthrough */ + case 2: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[1]) << 8; + /* fallthrough */ + default:; + } + contain32 = ((const BYTE*)srcBuffer)[srcSize-1]; + if (contain32 == 0) return (size_t)-FSE_ERROR_GENERIC; /* stop bit not present */ + bitD->bitsConsumed = 8 - FSE_highbit32(contain32); + bitD->bitsConsumed += (U32)(sizeof(size_t) - srcSize)*8; + } + + return srcSize; +} + + +/*!FSE_lookBits + * Provides next n bits from the bitContainer. + * bitContainer is not modified (bits are still present for next read/look) + * On 32-bits, maxNbBits==25 + * On 64-bits, maxNbBits==57 + * return : value extracted. + */ +static size_t FSE_lookBits(FSE_DStream_t* bitD, U32 nbBits) +{ + const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1; + return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask); +} + +static size_t FSE_lookBitsFast(FSE_DStream_t* bitD, U32 nbBits) /* only if nbBits >= 1 !! */ +{ + const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1; + return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask+1)-nbBits) & bitMask); +} + +static void FSE_skipBits(FSE_DStream_t* bitD, U32 nbBits) +{ + bitD->bitsConsumed += nbBits; +} + + +/*!FSE_readBits + * Read next n bits from the bitContainer. + * On 32-bits, don't read more than maxNbBits==25 + * On 64-bits, don't read more than maxNbBits==57 + * Use the fast variant *only* if n >= 1. + * return : value extracted. + */ +static size_t FSE_readBits(FSE_DStream_t* bitD, U32 nbBits) +{ + size_t value = FSE_lookBits(bitD, nbBits); + FSE_skipBits(bitD, nbBits); + return value; +} + +static size_t FSE_readBitsFast(FSE_DStream_t* bitD, U32 nbBits) /* only if nbBits >= 1 !! */ +{ + size_t value = FSE_lookBitsFast(bitD, nbBits); + FSE_skipBits(bitD, nbBits); + return value; +} + +static unsigned FSE_reloadDStream(FSE_DStream_t* bitD) +{ + if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* should never happen */ + return FSE_DStream_tooFar; + + if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer)) + { + bitD->ptr -= bitD->bitsConsumed >> 3; + bitD->bitsConsumed &= 7; + bitD->bitContainer = FSE_readLEST(bitD->ptr); + return FSE_DStream_unfinished; + } + if (bitD->ptr == bitD->start) + { + if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return FSE_DStream_endOfBuffer; + return FSE_DStream_completed; + } + { + U32 nbBytes = bitD->bitsConsumed >> 3; + U32 result = FSE_DStream_unfinished; + if (bitD->ptr - nbBytes < bitD->start) + { + nbBytes = (U32)(bitD->ptr - bitD->start); /* ptr > start */ + result = FSE_DStream_endOfBuffer; + } + bitD->ptr -= nbBytes; + bitD->bitsConsumed -= nbBytes*8; + bitD->bitContainer = FSE_readLEST(bitD->ptr); /* reminder : srcSize > sizeof(bitD) */ + return result; + } +} + + +static void FSE_initDState(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD, const FSE_DTable* dt) +{ + const void* ptr = dt; + const FSE_DTableHeader* const DTableH = (const FSE_DTableHeader*)ptr; + DStatePtr->state = FSE_readBits(bitD, DTableH->tableLog); + FSE_reloadDStream(bitD); + DStatePtr->table = dt + 1; +} + +static BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD) +{ + const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; + const U32 nbBits = DInfo.nbBits; + BYTE symbol = DInfo.symbol; + size_t lowBits = FSE_readBits(bitD, nbBits); + + DStatePtr->state = DInfo.newState + lowBits; + return symbol; +} + +static BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD) +{ + const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; + const U32 nbBits = DInfo.nbBits; + BYTE symbol = DInfo.symbol; + size_t lowBits = FSE_readBitsFast(bitD, nbBits); + + DStatePtr->state = DInfo.newState + lowBits; + return symbol; +} + +/* FSE_endOfDStream + Tells if bitD has reached end of bitStream or not */ + +static unsigned FSE_endOfDStream(const FSE_DStream_t* bitD) +{ + return ((bitD->ptr == bitD->start) && (bitD->bitsConsumed == sizeof(bitD->bitContainer)*8)); +} + +static unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr) +{ + return DStatePtr->state == 0; +} + + +FORCE_INLINE size_t FSE_decompress_usingDTable_generic( + void* dst, size_t maxDstSize, + const void* cSrc, size_t cSrcSize, + const FSE_DTable* dt, const unsigned fast) +{ + BYTE* const ostart = (BYTE*) dst; + BYTE* op = ostart; + BYTE* const omax = op + maxDstSize; + BYTE* const olimit = omax-3; + + FSE_DStream_t bitD; + FSE_DState_t state1; + FSE_DState_t state2; + size_t errorCode; + + /* Init */ + errorCode = FSE_initDStream(&bitD, cSrc, cSrcSize); /* replaced last arg by maxCompressed Size */ + if (FSE_isError(errorCode)) return errorCode; + + FSE_initDState(&state1, &bitD, dt); + FSE_initDState(&state2, &bitD, dt); + +#define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD) + + /* 4 symbols per loop */ + for ( ; (FSE_reloadDStream(&bitD)==FSE_DStream_unfinished) && (op sizeof(bitD.bitContainer)*8) /* This test must be static */ + FSE_reloadDStream(&bitD); + + op[1] = FSE_GETSYMBOL(&state2); + + if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */ + { if (FSE_reloadDStream(&bitD) > FSE_DStream_unfinished) { op+=2; break; } } + + op[2] = FSE_GETSYMBOL(&state1); + + if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */ + FSE_reloadDStream(&bitD); + + op[3] = FSE_GETSYMBOL(&state2); + } + + /* tail */ + /* note : FSE_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly FSE_DStream_completed */ + while (1) + { + if ( (FSE_reloadDStream(&bitD)>FSE_DStream_completed) || (op==omax) || (FSE_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state1))) ) + break; + + *op++ = FSE_GETSYMBOL(&state1); + + if ( (FSE_reloadDStream(&bitD)>FSE_DStream_completed) || (op==omax) || (FSE_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state2))) ) + break; + + *op++ = FSE_GETSYMBOL(&state2); + } + + /* end ? */ + if (FSE_endOfDStream(&bitD) && FSE_endOfDState(&state1) && FSE_endOfDState(&state2)) + return op-ostart; + + if (op==omax) return (size_t)-FSE_ERROR_dstSize_tooSmall; /* dst buffer is full, but cSrc unfinished */ + + return (size_t)-FSE_ERROR_corruptionDetected; +} + + +static size_t FSE_decompress_usingDTable(void* dst, size_t originalSize, + const void* cSrc, size_t cSrcSize, + const FSE_DTable* dt) +{ + FSE_DTableHeader DTableH; + memcpy(&DTableH, dt, sizeof(DTableH)); /* memcpy() into local variable, to avoid strict aliasing warning */ + + /* select fast mode (static) */ + if (DTableH.fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1); + return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0); +} + + +static size_t FSE_decompress(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize) +{ + const BYTE* const istart = (const BYTE*)cSrc; + const BYTE* ip = istart; + short counting[FSE_MAX_SYMBOL_VALUE+1]; + DTable_max_t dt; /* Static analyzer seems unable to understand this table will be properly initialized later */ + unsigned tableLog; + unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE; + size_t errorCode; + + if (cSrcSize<2) return (size_t)-FSE_ERROR_srcSize_wrong; /* too small input size */ + + /* normal FSE decoding mode */ + errorCode = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize); + if (FSE_isError(errorCode)) return errorCode; + if (errorCode >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong; /* too small input size */ + ip += errorCode; + cSrcSize -= errorCode; + + errorCode = FSE_buildDTable (dt, counting, maxSymbolValue, tableLog); + if (FSE_isError(errorCode)) return errorCode; + + /* always return, even if it is an error code */ + return FSE_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, dt); +} + + + +/* ******************************************************* +* Huff0 : Huffman block compression +*********************************************************/ +#define HUF_MAX_SYMBOL_VALUE 255 +#define HUF_DEFAULT_TABLELOG 12 /* used by default, when not specified */ +#define HUF_MAX_TABLELOG 12 /* max possible tableLog; for allocation purpose; can be modified */ +#define HUF_ABSOLUTEMAX_TABLELOG 16 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */ +#if (HUF_MAX_TABLELOG > HUF_ABSOLUTEMAX_TABLELOG) +# error "HUF_MAX_TABLELOG is too large !" +#endif + +typedef struct HUF_CElt_s { + U16 val; + BYTE nbBits; +} HUF_CElt ; + +typedef struct nodeElt_s { + U32 count; + U16 parent; + BYTE byte; + BYTE nbBits; +} nodeElt; + + +/* ******************************************************* +* Huff0 : Huffman block decompression +*********************************************************/ +typedef struct { + BYTE byte; + BYTE nbBits; +} HUF_DElt; + +static size_t HUF_readDTable (U16* DTable, const void* src, size_t srcSize) +{ + BYTE huffWeight[HUF_MAX_SYMBOL_VALUE + 1]; + U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1]; /* large enough for values from 0 to 16 */ + U32 weightTotal; + U32 maxBits; + const BYTE* ip = (const BYTE*) src; + size_t iSize; + size_t oSize; + U32 n; + U32 nextRankStart; + void* ptr = DTable+1; + HUF_DElt* const dt = (HUF_DElt*)ptr; + + if (!srcSize) return (size_t)-FSE_ERROR_srcSize_wrong; + iSize = ip[0]; + + FSE_STATIC_ASSERT(sizeof(HUF_DElt) == sizeof(U16)); /* if compilation fails here, assertion is false */ + //memset(huffWeight, 0, sizeof(huffWeight)); /* should not be necessary, but some analyzer complain ... */ + if (iSize >= 128) /* special header */ + { + if (iSize >= (242)) /* RLE */ + { + static int l[14] = { 1, 2, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64, 127, 128 }; + oSize = l[iSize-242]; + memset(huffWeight, 1, sizeof(huffWeight)); + iSize = 0; + } + else /* Incompressible */ + { + oSize = iSize - 127; + iSize = ((oSize+1)/2); + if (iSize+1 > srcSize) return (size_t)-FSE_ERROR_srcSize_wrong; + ip += 1; + for (n=0; n> 4; + huffWeight[n+1] = ip[n/2] & 15; + } + } + } + else /* header compressed with FSE (normal case) */ + { + if (iSize+1 > srcSize) return (size_t)-FSE_ERROR_srcSize_wrong; + oSize = FSE_decompress(huffWeight, HUF_MAX_SYMBOL_VALUE, ip+1, iSize); /* max 255 values decoded, last one is implied */ + if (FSE_isError(oSize)) return oSize; + } + + /* collect weight stats */ + memset(rankVal, 0, sizeof(rankVal)); + weightTotal = 0; + for (n=0; n= HUF_ABSOLUTEMAX_TABLELOG) return (size_t)-FSE_ERROR_corruptionDetected; + rankVal[huffWeight[n]]++; + weightTotal += (1 << huffWeight[n]) >> 1; + } + if (weightTotal == 0) return (size_t)-FSE_ERROR_corruptionDetected; + + /* get last non-null symbol weight (implied, total must be 2^n) */ + maxBits = FSE_highbit32(weightTotal) + 1; + if (maxBits > DTable[0]) return (size_t)-FSE_ERROR_tableLog_tooLarge; /* DTable is too small */ + DTable[0] = (U16)maxBits; + { + U32 total = 1 << maxBits; + U32 rest = total - weightTotal; + U32 verif = 1 << FSE_highbit32(rest); + U32 lastWeight = FSE_highbit32(rest) + 1; + if (verif != rest) return (size_t)-FSE_ERROR_corruptionDetected; /* last value must be a clean power of 2 */ + huffWeight[oSize] = (BYTE)lastWeight; + rankVal[lastWeight]++; + } + + /* check tree construction validity */ + if ((rankVal[1] < 2) || (rankVal[1] & 1)) return (size_t)-FSE_ERROR_corruptionDetected; /* by construction : at least 2 elts of rank 1, must be even */ + + /* Prepare ranks */ + nextRankStart = 0; + for (n=1; n<=maxBits; n++) + { + U32 current = nextRankStart; + nextRankStart += (rankVal[n] << (n-1)); + rankVal[n] = current; + } + + /* fill DTable */ + for (n=0; n<=oSize; n++) + { + const U32 w = huffWeight[n]; + const U32 length = (1 << w) >> 1; + U32 i; + HUF_DElt D; + D.byte = (BYTE)n; D.nbBits = (BYTE)(maxBits + 1 - w); + for (i = rankVal[w]; i < rankVal[w] + length; i++) + dt[i] = D; + rankVal[w] += length; + } + + return iSize+1; +} + + +static BYTE HUF_decodeSymbol(FSE_DStream_t* Dstream, const HUF_DElt* dt, const U32 dtLog) +{ + const size_t val = FSE_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */ + const BYTE c = dt[val].byte; + FSE_skipBits(Dstream, dt[val].nbBits); + return c; +} + +static size_t HUF_decompress_usingDTable( /* -3% slower when non static */ + void* dst, size_t maxDstSize, + const void* cSrc, size_t cSrcSize, + const U16* DTable) +{ + if (cSrcSize < 6) return (size_t)-FSE_ERROR_srcSize_wrong; + { + BYTE* const ostart = (BYTE*) dst; + BYTE* op = ostart; + BYTE* const omax = op + maxDstSize; + BYTE* const olimit = maxDstSize < 15 ? op : omax-15; + + const void* ptr = DTable; + const HUF_DElt* const dt = (const HUF_DElt*)(ptr)+1; + const U32 dtLog = DTable[0]; + size_t errorCode; + U32 reloadStatus; + + /* Init */ + + const U16* jumpTable = (const U16*)cSrc; + const size_t length1 = FSE_readLE16(jumpTable); + const size_t length2 = FSE_readLE16(jumpTable+1); + const size_t length3 = FSE_readLE16(jumpTable+2); + const size_t length4 = cSrcSize - 6 - length1 - length2 - length3; /* check coherency !! */ + const char* const start1 = (const char*)(cSrc) + 6; + const char* const start2 = start1 + length1; + const char* const start3 = start2 + length2; + const char* const start4 = start3 + length3; + FSE_DStream_t bitD1, bitD2, bitD3, bitD4; + + if (length1+length2+length3+6 >= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong; + + errorCode = FSE_initDStream(&bitD1, start1, length1); + if (FSE_isError(errorCode)) return errorCode; + errorCode = FSE_initDStream(&bitD2, start2, length2); + if (FSE_isError(errorCode)) return errorCode; + errorCode = FSE_initDStream(&bitD3, start3, length3); + if (FSE_isError(errorCode)) return errorCode; + errorCode = FSE_initDStream(&bitD4, start4, length4); + if (FSE_isError(errorCode)) return errorCode; + + reloadStatus=FSE_reloadDStream(&bitD2); + + /* 16 symbols per loop */ + for ( ; (reloadStatus12)) FSE_reloadDStream(&Dstream) + + #define HUF_DECODE_SYMBOL_2(n, Dstream) \ + op[n] = HUF_decodeSymbol(&Dstream, dt, dtLog); \ + if (FSE_32bits()) FSE_reloadDStream(&Dstream) + + HUF_DECODE_SYMBOL_1( 0, bitD1); + HUF_DECODE_SYMBOL_1( 1, bitD2); + HUF_DECODE_SYMBOL_1( 2, bitD3); + HUF_DECODE_SYMBOL_1( 3, bitD4); + HUF_DECODE_SYMBOL_2( 4, bitD1); + HUF_DECODE_SYMBOL_2( 5, bitD2); + HUF_DECODE_SYMBOL_2( 6, bitD3); + HUF_DECODE_SYMBOL_2( 7, bitD4); + HUF_DECODE_SYMBOL_1( 8, bitD1); + HUF_DECODE_SYMBOL_1( 9, bitD2); + HUF_DECODE_SYMBOL_1(10, bitD3); + HUF_DECODE_SYMBOL_1(11, bitD4); + HUF_DECODE_SYMBOL_0(12, bitD1); + HUF_DECODE_SYMBOL_0(13, bitD2); + HUF_DECODE_SYMBOL_0(14, bitD3); + HUF_DECODE_SYMBOL_0(15, bitD4); + } + + if (reloadStatus!=FSE_DStream_completed) /* not complete : some bitStream might be FSE_DStream_unfinished */ + return (size_t)-FSE_ERROR_corruptionDetected; + + /* tail */ + { + /* bitTail = bitD1; */ /* *much* slower : -20% !??! */ + FSE_DStream_t bitTail; + bitTail.ptr = bitD1.ptr; + bitTail.bitsConsumed = bitD1.bitsConsumed; + bitTail.bitContainer = bitD1.bitContainer; /* required in case of FSE_DStream_endOfBuffer */ + bitTail.start = start1; + for ( ; (FSE_reloadDStream(&bitTail) < FSE_DStream_completed) && (op= cSrcSize) return (size_t)-FSE_ERROR_srcSize_wrong; + ip += errorCode; + cSrcSize -= errorCode; + + return HUF_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, DTable); +} + + +#endif /* FSE_COMMONDEFS_ONLY */ + +/* + zstd - standard compression library + Copyright (C) 2014-2015, Yann Collet. + + BSD 2-Clause License (https://opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - zstd source repository : https://github.com/Cyan4973/zstd + - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c +*/ + +/**************************************************************** +* Tuning parameters +*****************************************************************/ +/* MEMORY_USAGE : +* Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.) +* Increasing memory usage improves compression ratio +* Reduced memory usage can improve speed, due to cache effect */ +#define ZSTD_MEMORY_USAGE 17 + + +/************************************** + CPU Feature Detection +**************************************/ +/* + * Automated efficient unaligned memory access detection + * Based on known hardware architectures + * This list will be updated thanks to feedbacks + */ +#if defined(CPU_HAS_EFFICIENT_UNALIGNED_MEMORY_ACCESS) \ + || defined(__ARM_FEATURE_UNALIGNED) \ + || defined(__i386__) || defined(__x86_64__) \ + || defined(_M_IX86) || defined(_M_X64) \ + || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_8__) \ + || (defined(_M_ARM) && (_M_ARM >= 7)) +# define ZSTD_UNALIGNED_ACCESS 1 +#else +# define ZSTD_UNALIGNED_ACCESS 0 +#endif + + +/******************************************************** +* Includes +*********************************************************/ +#include /* calloc */ +#include /* memcpy, memmove */ +#include /* debug : printf */ + + +/******************************************************** +* Compiler specifics +*********************************************************/ +#ifdef __AVX2__ +# include /* AVX2 intrinsics */ +#endif + +#ifdef _MSC_VER /* Visual Studio */ +# include /* For Visual 2005 */ +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +# pragma warning(disable : 4324) /* disable: C4324: padded structure */ +#endif + + +#ifndef MEM_ACCESS_MODULE +#define MEM_ACCESS_MODULE +/******************************************************** +* Basic Types +*********************************************************/ +#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ +# if defined(_AIX) +# include +# else +# include /* intptr_t */ +# endif +typedef uint8_t BYTE; +typedef uint16_t U16; +typedef int16_t S16; +typedef uint32_t U32; +typedef int32_t S32; +typedef uint64_t U64; +#else +typedef unsigned char BYTE; +typedef unsigned short U16; +typedef signed short S16; +typedef unsigned int U32; +typedef signed int S32; +typedef unsigned long long U64; +#endif + +#endif /* MEM_ACCESS_MODULE */ + + +/******************************************************** +* Constants +*********************************************************/ +static const U32 ZSTD_magicNumber = 0xFD2FB51E; /* 3rd version : seqNb header */ + +#define HASH_LOG (ZSTD_MEMORY_USAGE - 2) +#define HASH_TABLESIZE (1 << HASH_LOG) +#define HASH_MASK (HASH_TABLESIZE - 1) + +#define KNUTH 2654435761 + +#define BIT7 128 +#define BIT6 64 +#define BIT5 32 +#define BIT4 16 + +#define KB *(1 <<10) +#define MB *(1 <<20) +#define GB *(1U<<30) + +#define BLOCKSIZE (128 KB) /* define, for static allocation */ + +#define WORKPLACESIZE (BLOCKSIZE*3) +#define MINMATCH 4 +#define MLbits 7 +#define LLbits 6 +#define Offbits 5 +#define MaxML ((1<>3]; +#else + U32 hashTable[HASH_TABLESIZE]; +#endif + BYTE buffer[WORKPLACESIZE]; +} cctxi_t; + + + + +/************************************** +* Error Management +**************************************/ +/* published entry point */ +unsigned ZSTDv01_isError(size_t code) { return ERR_isError(code); } + + +/************************************** +* Tool functions +**************************************/ +#define ZSTD_VERSION_MAJOR 0 /* for breaking interface changes */ +#define ZSTD_VERSION_MINOR 1 /* for new (non-breaking) interface capabilities */ +#define ZSTD_VERSION_RELEASE 3 /* for tweaks, bug-fixes, or development */ +#define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE) + +/************************************************************** +* Decompression code +**************************************************************/ + +static size_t ZSTDv01_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr) +{ + const BYTE* const in = (const BYTE* const)src; + BYTE headerFlags; + U32 cSize; + + if (srcSize < 3) return ERROR(srcSize_wrong); + + headerFlags = *in; + cSize = in[2] + (in[1]<<8) + ((in[0] & 7)<<16); + + bpPtr->blockType = (blockType_t)(headerFlags >> 6); + bpPtr->origSize = (bpPtr->blockType == bt_rle) ? cSize : 0; + + if (bpPtr->blockType == bt_end) return 0; + if (bpPtr->blockType == bt_rle) return 1; + return cSize; +} + + +static size_t ZSTD_copyUncompressedBlock(void* dst, size_t maxDstSize, const void* src, size_t srcSize) +{ + if (srcSize > maxDstSize) return ERROR(dstSize_tooSmall); + if (srcSize > 0) { + memcpy(dst, src, srcSize); + } + return srcSize; +} + + +static size_t ZSTD_decompressLiterals(void* ctx, + void* dst, size_t maxDstSize, + const void* src, size_t srcSize) +{ + BYTE* op = (BYTE*)dst; + BYTE* const oend = op + maxDstSize; + const BYTE* ip = (const BYTE*)src; + size_t errorCode; + size_t litSize; + + /* check : minimum 2, for litSize, +1, for content */ + if (srcSize <= 3) return ERROR(corruption_detected); + + litSize = ip[1] + (ip[0]<<8); + litSize += ((ip[-3] >> 3) & 7) << 16; /* mmmmh.... */ + op = oend - litSize; + + (void)ctx; + if (litSize > maxDstSize) return ERROR(dstSize_tooSmall); + errorCode = HUF_decompress(op, litSize, ip+2, srcSize-2); + if (FSE_isError(errorCode)) return ERROR(GENERIC); + return litSize; +} + + +static size_t ZSTDv01_decodeLiteralsBlock(void* ctx, + void* dst, size_t maxDstSize, + const BYTE** litStart, size_t* litSize, + const void* src, size_t srcSize) +{ + const BYTE* const istart = (const BYTE* const)src; + const BYTE* ip = istart; + BYTE* const ostart = (BYTE* const)dst; + BYTE* const oend = ostart + maxDstSize; + blockProperties_t litbp; + + size_t litcSize = ZSTDv01_getcBlockSize(src, srcSize, &litbp); + if (ZSTDv01_isError(litcSize)) return litcSize; + if (litcSize > srcSize - ZSTD_blockHeaderSize) return ERROR(srcSize_wrong); + ip += ZSTD_blockHeaderSize; + + switch(litbp.blockType) + { + case bt_raw: + *litStart = ip; + ip += litcSize; + *litSize = litcSize; + break; + case bt_rle: + { + size_t rleSize = litbp.origSize; + if (rleSize>maxDstSize) return ERROR(dstSize_tooSmall); + if (!srcSize) return ERROR(srcSize_wrong); + if (rleSize > 0) { + memset(oend - rleSize, *ip, rleSize); + } + *litStart = oend - rleSize; + *litSize = rleSize; + ip++; + break; + } + case bt_compressed: + { + size_t decodedLitSize = ZSTD_decompressLiterals(ctx, dst, maxDstSize, ip, litcSize); + if (ZSTDv01_isError(decodedLitSize)) return decodedLitSize; + *litStart = oend - decodedLitSize; + *litSize = decodedLitSize; + ip += litcSize; + break; + } + case bt_end: + default: + return ERROR(GENERIC); + } + + return ip-istart; +} + + +static size_t ZSTDv01_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr, size_t* dumpsLengthPtr, + FSE_DTable* DTableLL, FSE_DTable* DTableML, FSE_DTable* DTableOffb, + const void* src, size_t srcSize) +{ + const BYTE* const istart = (const BYTE* const)src; + const BYTE* ip = istart; + const BYTE* const iend = istart + srcSize; + U32 LLtype, Offtype, MLtype; + U32 LLlog, Offlog, MLlog; + size_t dumpsLength; + + /* check */ + if (srcSize < 5) return ERROR(srcSize_wrong); + + /* SeqHead */ + *nbSeq = ZSTD_readLE16(ip); ip+=2; + LLtype = *ip >> 6; + Offtype = (*ip >> 4) & 3; + MLtype = (*ip >> 2) & 3; + if (*ip & 2) + { + dumpsLength = ip[2]; + dumpsLength += ip[1] << 8; + ip += 3; + } + else + { + dumpsLength = ip[1]; + dumpsLength += (ip[0] & 1) << 8; + ip += 2; + } + *dumpsPtr = ip; + ip += dumpsLength; + *dumpsLengthPtr = dumpsLength; + + /* check */ + if (ip > iend-3) return ERROR(srcSize_wrong); /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */ + + /* sequences */ + { + S16 norm[MaxML+1]; /* assumption : MaxML >= MaxLL and MaxOff */ + size_t headerSize; + + /* Build DTables */ + switch(LLtype) + { + case bt_rle : + LLlog = 0; + FSE_buildDTable_rle(DTableLL, *ip++); break; + case bt_raw : + LLlog = LLbits; + FSE_buildDTable_raw(DTableLL, LLbits); break; + default : + { U32 max = MaxLL; + headerSize = FSE_readNCount(norm, &max, &LLlog, ip, iend-ip); + if (FSE_isError(headerSize)) return ERROR(GENERIC); + if (LLlog > LLFSELog) return ERROR(corruption_detected); + ip += headerSize; + FSE_buildDTable(DTableLL, norm, max, LLlog); + } } + + switch(Offtype) + { + case bt_rle : + Offlog = 0; + if (ip > iend-2) return ERROR(srcSize_wrong); /* min : "raw", hence no header, but at least xxLog bits */ + FSE_buildDTable_rle(DTableOffb, *ip++); break; + case bt_raw : + Offlog = Offbits; + FSE_buildDTable_raw(DTableOffb, Offbits); break; + default : + { U32 max = MaxOff; + headerSize = FSE_readNCount(norm, &max, &Offlog, ip, iend-ip); + if (FSE_isError(headerSize)) return ERROR(GENERIC); + if (Offlog > OffFSELog) return ERROR(corruption_detected); + ip += headerSize; + FSE_buildDTable(DTableOffb, norm, max, Offlog); + } } + + switch(MLtype) + { + case bt_rle : + MLlog = 0; + if (ip > iend-2) return ERROR(srcSize_wrong); /* min : "raw", hence no header, but at least xxLog bits */ + FSE_buildDTable_rle(DTableML, *ip++); break; + case bt_raw : + MLlog = MLbits; + FSE_buildDTable_raw(DTableML, MLbits); break; + default : + { U32 max = MaxML; + headerSize = FSE_readNCount(norm, &max, &MLlog, ip, iend-ip); + if (FSE_isError(headerSize)) return ERROR(GENERIC); + if (MLlog > MLFSELog) return ERROR(corruption_detected); + ip += headerSize; + FSE_buildDTable(DTableML, norm, max, MLlog); + } } } + + return ip-istart; +} + + +typedef struct { + size_t litLength; + size_t offset; + size_t matchLength; +} seq_t; + +typedef struct { + FSE_DStream_t DStream; + FSE_DState_t stateLL; + FSE_DState_t stateOffb; + FSE_DState_t stateML; + size_t prevOffset; + const BYTE* dumps; + const BYTE* dumpsEnd; +} seqState_t; + + +static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState) +{ + size_t litLength; + size_t prevOffset; + size_t offset; + size_t matchLength; + const BYTE* dumps = seqState->dumps; + const BYTE* const de = seqState->dumpsEnd; + + /* Literal length */ + litLength = FSE_decodeSymbol(&(seqState->stateLL), &(seqState->DStream)); + prevOffset = litLength ? seq->offset : seqState->prevOffset; + seqState->prevOffset = seq->offset; + if (litLength == MaxLL) + { + const U32 add = dumpsstateOffb), &(seqState->DStream)); + if (ZSTD_32bits()) FSE_reloadDStream(&(seqState->DStream)); + nbBits = offsetCode - 1; + if (offsetCode==0) nbBits = 0; /* cmove */ + offset = ((size_t)1 << (nbBits & ((sizeof(offset)*8)-1))) + FSE_readBits(&(seqState->DStream), nbBits); + if (ZSTD_32bits()) FSE_reloadDStream(&(seqState->DStream)); + if (offsetCode==0) offset = prevOffset; + } + + /* MatchLength */ + matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream)); + if (matchLength == MaxML) + { + const U32 add = dumpslitLength = litLength; + seq->offset = offset; + seq->matchLength = matchLength; + seqState->dumps = dumps; +} + + +static size_t ZSTD_execSequence(BYTE* op, + seq_t sequence, + const BYTE** litPtr, const BYTE* const litLimit, + BYTE* const base, BYTE* const oend) +{ + static const int dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4}; /* added */ + static const int dec64table[] = {8, 8, 8, 7, 8, 9,10,11}; /* subtracted */ + const BYTE* const ostart = op; + BYTE* const oLitEnd = op + sequence.litLength; + const size_t litLength = sequence.litLength; + BYTE* const endMatch = op + litLength + sequence.matchLength; /* risk : address space overflow (32-bits) */ + const BYTE* const litEnd = *litPtr + litLength; + + /* checks */ + size_t const seqLength = sequence.litLength + sequence.matchLength; + + if (seqLength > (size_t)(oend - op)) return ERROR(dstSize_tooSmall); + if (sequence.litLength > (size_t)(litLimit - *litPtr)) return ERROR(corruption_detected); + /* Now we know there are no overflow in literal nor match lengths, can use pointer checks */ + if (sequence.offset > (U32)(oLitEnd - base)) return ERROR(corruption_detected); + + if (endMatch > oend) return ERROR(dstSize_tooSmall); /* overwrite beyond dst buffer */ + if (litEnd > litLimit) return ERROR(corruption_detected); /* overRead beyond lit buffer */ + if (sequence.matchLength > (size_t)(*litPtr-op)) return ERROR(dstSize_tooSmall); /* overwrite literal segment */ + + /* copy Literals */ + ZSTD_memmove(op, *litPtr, sequence.litLength); /* note : v0.1 seems to allow scenarios where output or input are close to end of buffer */ + + op += litLength; + *litPtr = litEnd; /* update for next sequence */ + + /* check : last match must be at a minimum distance of 8 from end of dest buffer */ + if (oend-op < 8) return ERROR(dstSize_tooSmall); + + /* copy Match */ + { + const U32 overlapRisk = (((size_t)(litEnd - endMatch)) < 12); + const BYTE* match = op - sequence.offset; /* possible underflow at op - offset ? */ + size_t qutt = 12; + U64 saved[2]; + + /* check */ + if (match < base) return ERROR(corruption_detected); + if (sequence.offset > (size_t)base) return ERROR(corruption_detected); + + /* save beginning of literal sequence, in case of write overlap */ + if (overlapRisk) + { + if ((endMatch + qutt) > oend) qutt = oend-endMatch; + memcpy(saved, endMatch, qutt); + } + + if (sequence.offset < 8) + { + const int dec64 = dec64table[sequence.offset]; + op[0] = match[0]; + op[1] = match[1]; + op[2] = match[2]; + op[3] = match[3]; + match += dec32table[sequence.offset]; + ZSTD_copy4(op+4, match); + match -= dec64; + } else { ZSTD_copy8(op, match); } + op += 8; match += 8; + + if (endMatch > oend-(16-MINMATCH)) + { + if (op < oend-8) + { + ZSTD_wildcopy(op, match, (oend-8) - op); + match += (oend-8) - op; + op = oend-8; + } + while (opLLTable; + U32* DTableML = dctx->MLTable; + U32* DTableOffb = dctx->OffTable; + BYTE* const base = (BYTE*) (dctx->base); + + /* Build Decoding Tables */ + errorCode = ZSTDv01_decodeSeqHeaders(&nbSeq, &dumps, &dumpsLength, + DTableLL, DTableML, DTableOffb, + ip, iend-ip); + if (ZSTDv01_isError(errorCode)) return errorCode; + ip += errorCode; + + /* Regen sequences */ + { + seq_t sequence; + seqState_t seqState; + + memset(&sequence, 0, sizeof(sequence)); + seqState.dumps = dumps; + seqState.dumpsEnd = dumps + dumpsLength; + seqState.prevOffset = 1; + errorCode = FSE_initDStream(&(seqState.DStream), ip, iend-ip); + if (FSE_isError(errorCode)) return ERROR(corruption_detected); + FSE_initDState(&(seqState.stateLL), &(seqState.DStream), DTableLL); + FSE_initDState(&(seqState.stateOffb), &(seqState.DStream), DTableOffb); + FSE_initDState(&(seqState.stateML), &(seqState.DStream), DTableML); + + for ( ; (FSE_reloadDStream(&(seqState.DStream)) <= FSE_DStream_completed) && (nbSeq>0) ; ) + { + size_t oneSeqSize; + nbSeq--; + ZSTD_decodeSequence(&sequence, &seqState); + oneSeqSize = ZSTD_execSequence(op, sequence, &litPtr, litEnd, base, oend); + if (ZSTDv01_isError(oneSeqSize)) return oneSeqSize; + op += oneSeqSize; + } + + /* check if reached exact end */ + if ( !FSE_endOfDStream(&(seqState.DStream)) ) return ERROR(corruption_detected); /* requested too much : data is corrupted */ + if (nbSeq<0) return ERROR(corruption_detected); /* requested too many sequences : data is corrupted */ + + /* last literal segment */ + { + size_t lastLLSize = litEnd - litPtr; + if (op+lastLLSize > oend) return ERROR(dstSize_tooSmall); + if (lastLLSize > 0) { + if (op != litPtr) memmove(op, litPtr, lastLLSize); + op += lastLLSize; + } + } + } + + return op-ostart; +} + + +static size_t ZSTD_decompressBlock( + void* ctx, + void* dst, size_t maxDstSize, + const void* src, size_t srcSize) +{ + /* blockType == blockCompressed, srcSize is trusted */ + const BYTE* ip = (const BYTE*)src; + const BYTE* litPtr = NULL; + size_t litSize = 0; + size_t errorCode; + + /* Decode literals sub-block */ + errorCode = ZSTDv01_decodeLiteralsBlock(ctx, dst, maxDstSize, &litPtr, &litSize, src, srcSize); + if (ZSTDv01_isError(errorCode)) return errorCode; + ip += errorCode; + srcSize -= errorCode; + + return ZSTD_decompressSequences(ctx, dst, maxDstSize, ip, srcSize, litPtr, litSize); +} + + +size_t ZSTDv01_decompressDCtx(void* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) +{ + const BYTE* ip = (const BYTE*)src; + const BYTE* iend = ip + srcSize; + BYTE* const ostart = (BYTE* const)dst; + BYTE* op = ostart; + BYTE* const oend = ostart + maxDstSize; + size_t remainingSize = srcSize; + U32 magicNumber; + size_t errorCode=0; + blockProperties_t blockProperties; + + /* Frame Header */ + if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) return ERROR(srcSize_wrong); + magicNumber = ZSTD_readBE32(src); + if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown); + ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize; + + /* Loop on each block */ + while (1) + { + size_t blockSize = ZSTDv01_getcBlockSize(ip, iend-ip, &blockProperties); + if (ZSTDv01_isError(blockSize)) return blockSize; + + ip += ZSTD_blockHeaderSize; + remainingSize -= ZSTD_blockHeaderSize; + if (blockSize > remainingSize) return ERROR(srcSize_wrong); + + switch(blockProperties.blockType) + { + case bt_compressed: + errorCode = ZSTD_decompressBlock(ctx, op, oend-op, ip, blockSize); + break; + case bt_raw : + errorCode = ZSTD_copyUncompressedBlock(op, oend-op, ip, blockSize); + break; + case bt_rle : + return ERROR(GENERIC); /* not yet supported */ + break; + case bt_end : + /* end of frame */ + if (remainingSize) return ERROR(srcSize_wrong); + break; + default: + return ERROR(GENERIC); + } + if (blockSize == 0) break; /* bt_end */ + + if (ZSTDv01_isError(errorCode)) return errorCode; + op += errorCode; + ip += blockSize; + remainingSize -= blockSize; + } + + return op-ostart; +} + +size_t ZSTDv01_decompress(void* dst, size_t maxDstSize, const void* src, size_t srcSize) +{ + dctx_t ctx; + ctx.base = dst; + return ZSTDv01_decompressDCtx(&ctx, dst, maxDstSize, src, srcSize); +} + +/* ZSTD_errorFrameSizeInfoLegacy() : + assumes `cSize` and `dBound` are _not_ NULL */ +static void ZSTD_errorFrameSizeInfoLegacy(size_t* cSize, unsigned long long* dBound, size_t ret) +{ + *cSize = ret; + *dBound = ZSTD_CONTENTSIZE_ERROR; +} + +void ZSTDv01_findFrameSizeInfoLegacy(const void *src, size_t srcSize, size_t* cSize, unsigned long long* dBound) +{ + const BYTE* ip = (const BYTE*)src; + size_t remainingSize = srcSize; + size_t nbBlocks = 0; + U32 magicNumber; + blockProperties_t blockProperties; + + /* Frame Header */ + if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) { + ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong)); + return; + } + magicNumber = ZSTD_readBE32(src); + if (magicNumber != ZSTD_magicNumber) { + ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(prefix_unknown)); + return; + } + ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize; + + /* Loop on each block */ + while (1) + { + size_t blockSize = ZSTDv01_getcBlockSize(ip, remainingSize, &blockProperties); + if (ZSTDv01_isError(blockSize)) { + ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, blockSize); + return; + } + + ip += ZSTD_blockHeaderSize; + remainingSize -= ZSTD_blockHeaderSize; + if (blockSize > remainingSize) { + ZSTD_errorFrameSizeInfoLegacy(cSize, dBound, ERROR(srcSize_wrong)); + return; + } + + if (blockSize == 0) break; /* bt_end */ + + ip += blockSize; + remainingSize -= blockSize; + nbBlocks++; + } + + *cSize = ip - (const BYTE*)src; + *dBound = nbBlocks * BLOCKSIZE; +} + +/******************************* +* Streaming Decompression API +*******************************/ + +size_t ZSTDv01_resetDCtx(ZSTDv01_Dctx* dctx) +{ + dctx->expected = ZSTD_frameHeaderSize; + dctx->phase = 0; + dctx->previousDstEnd = NULL; + dctx->base = NULL; + return 0; +} + +ZSTDv01_Dctx* ZSTDv01_createDCtx(void) +{ + ZSTDv01_Dctx* dctx = (ZSTDv01_Dctx*)malloc(sizeof(ZSTDv01_Dctx)); + if (dctx==NULL) return NULL; + ZSTDv01_resetDCtx(dctx); + return dctx; +} + +size_t ZSTDv01_freeDCtx(ZSTDv01_Dctx* dctx) +{ + free(dctx); + return 0; +} + +size_t ZSTDv01_nextSrcSizeToDecompress(ZSTDv01_Dctx* dctx) +{ + return ((dctx_t*)dctx)->expected; +} + +size_t ZSTDv01_decompressContinue(ZSTDv01_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize) +{ + dctx_t* ctx = (dctx_t*)dctx; + + /* Sanity check */ + if (srcSize != ctx->expected) return ERROR(srcSize_wrong); + if (dst != ctx->previousDstEnd) /* not contiguous */ + ctx->base = dst; + + /* Decompress : frame header */ + if (ctx->phase == 0) + { + /* Check frame magic header */ + U32 magicNumber = ZSTD_readBE32(src); + if (magicNumber != ZSTD_magicNumber) return ERROR(prefix_unknown); + ctx->phase = 1; + ctx->expected = ZSTD_blockHeaderSize; + return 0; + } + + /* Decompress : block header */ + if (ctx->phase == 1) + { + blockProperties_t bp; + size_t blockSize = ZSTDv01_getcBlockSize(src, ZSTD_blockHeaderSize, &bp); + if (ZSTDv01_isError(blockSize)) return blockSize; + if (bp.blockType == bt_end) + { + ctx->expected = 0; + ctx->phase = 0; + } + else + { + ctx->expected = blockSize; + ctx->bType = bp.blockType; + ctx->phase = 2; + } + + return 0; + } + + /* Decompress : block content */ + { + size_t rSize; + switch(ctx->bType) + { + case bt_compressed: + rSize = ZSTD_decompressBlock(ctx, dst, maxDstSize, src, srcSize); + break; + case bt_raw : + rSize = ZSTD_copyUncompressedBlock(dst, maxDstSize, src, srcSize); + break; + case bt_rle : + return ERROR(GENERIC); /* not yet handled */ + break; + case bt_end : /* should never happen (filtered at phase 1) */ + rSize = 0; + break; + default: + return ERROR(GENERIC); + } + ctx->phase = 1; + ctx->expected = ZSTD_blockHeaderSize; + if (ZSTDv01_isError(rSize)) return rSize; + ctx->previousDstEnd = (void*)( ((char*)dst) + rSize); + return rSize; + } + +} diff --git a/c-blosc/internal-complibs/zstd-1.5.6/legacy/zstd_v01.h b/c-blosc/internal-complibs/zstd-1.5.6/legacy/zstd_v01.h new file mode 100644 index 0000000..6ac8769 --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.5.6/legacy/zstd_v01.h @@ -0,0 +1,94 @@ +/* + * Copyright (c) Yann Collet, Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_V01_H_28739879432 +#define ZSTD_V01_H_28739879432 + +#if defined (__cplusplus) +extern "C" { +#endif + +/* ************************************* +* Includes +***************************************/ +#include /* size_t */ + + +/* ************************************* +* Simple one-step function +***************************************/ +/** +ZSTDv01_decompress() : decompress ZSTD frames compliant with v0.1.x format + compressedSize : is the exact source size + maxOriginalSize : is the size of the 'dst' buffer, which must be already allocated. + It must be equal or larger than originalSize, otherwise decompression will fail. + return : the number of bytes decompressed into destination buffer (originalSize) + or an errorCode if it fails (which can be tested using ZSTDv01_isError()) +*/ +size_t ZSTDv01_decompress( void* dst, size_t maxOriginalSize, + const void* src, size_t compressedSize); + + /** + ZSTDv01_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.1.x format + srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src' + cSize (output parameter) : the number of bytes that would be read to decompress this frame + or an error code if it fails (which can be tested using ZSTDv01_isError()) + dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame + or ZSTD_CONTENTSIZE_ERROR if an error occurs + + note : assumes `cSize` and `dBound` are _not_ NULL. + */ +void ZSTDv01_findFrameSizeInfoLegacy(const void *src, size_t srcSize, + size_t* cSize, unsigned long long* dBound); + +/** +ZSTDv01_isError() : tells if the result of ZSTDv01_decompress() is an error +*/ +unsigned ZSTDv01_isError(size_t code); + + +/* ************************************* +* Advanced functions +***************************************/ +typedef struct ZSTDv01_Dctx_s ZSTDv01_Dctx; +ZSTDv01_Dctx* ZSTDv01_createDCtx(void); +size_t ZSTDv01_freeDCtx(ZSTDv01_Dctx* dctx); + +size_t ZSTDv01_decompressDCtx(void* ctx, + void* dst, size_t maxOriginalSize, + const void* src, size_t compressedSize); + +/* ************************************* +* Streaming functions +***************************************/ +size_t ZSTDv01_resetDCtx(ZSTDv01_Dctx* dctx); + +size_t ZSTDv01_nextSrcSizeToDecompress(ZSTDv01_Dctx* dctx); +size_t ZSTDv01_decompressContinue(ZSTDv01_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize); +/** + Use above functions alternatively. + ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue(). + ZSTD_decompressContinue() will use previous data blocks to improve compression if they are located prior to current block. + Result is the number of bytes regenerated within 'dst'. + It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header. +*/ + +/* ************************************* +* Prefix - version detection +***************************************/ +#define ZSTDv01_magicNumber 0xFD2FB51E /* Big Endian version */ +#define ZSTDv01_magicNumberLE 0x1EB52FFD /* Little Endian version */ + + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_V01_H_28739879432 */ diff --git a/c-blosc/internal-complibs/zstd-1.5.6/legacy/zstd_v02.c b/c-blosc/internal-complibs/zstd-1.5.6/legacy/zstd_v02.c new file mode 100644 index 0000000..6d39b6e --- /dev/null +++ b/c-blosc/internal-complibs/zstd-1.5.6/legacy/zstd_v02.c @@ -0,0 +1,3465 @@ +/* + * Copyright (c) Yann Collet, Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +#include /* size_t, ptrdiff_t */ +#include "zstd_v02.h" +#include "../common/compiler.h" +#include "../common/error_private.h" + + +/****************************************** +* Compiler-specific +******************************************/ +#if defined(_MSC_VER) /* Visual Studio */ +# include /* _byteswap_ulong */ +# include /* _byteswap_* */ +#endif + + +/* ****************************************************************** + mem.h + low-level memory access routines + Copyright (C) 2013-2015, Yann Collet. + + BSD 2-Clause License (https://opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy + - Public forum : https://groups.google.com/forum/#!forum/lz4c +****************************************************************** */ +#ifndef MEM_H_MODULE +#define MEM_H_MODULE + +#if defined (__cplusplus) +extern "C" { +#endif + +/****************************************** +* Includes +******************************************/ +#include /* size_t, ptrdiff_t */ +#include /* memcpy */ + + +/**************************************************************** +* Basic Types +*****************************************************************/ +#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +# if defined(_AIX) +# include +# else +# include /* intptr_t */ +# endif + typedef uint8_t BYTE; + typedef uint16_t U16; + typedef int16_t S16; + typedef uint32_t U32; + typedef int32_t S32; + typedef uint64_t U64; + typedef int64_t S64; +#else + typedef unsigned char BYTE; + typedef unsigned short U16; + typedef signed short S16; + typedef unsigned int U32; + typedef signed int S32; + typedef unsigned long long U64; + typedef signed long long S64; +#endif + + +/**************************************************************** +* Memory I/O +*****************************************************************/ + +MEM_STATIC unsigned MEM_32bits(void) { return sizeof(void*)==4; } +MEM_STATIC unsigned MEM_64bits(void) { return sizeof(void*)==8; } + +MEM_STATIC unsigned MEM_isLittleEndian(void) +{ + const union { U32 u; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */ + return one.c[0]; +} + +MEM_STATIC U16 MEM_read16(const void* memPtr) +{ + U16 val; memcpy(&val, memPtr, sizeof(val)); return val; +} + +MEM_STATIC U32 MEM_read32(const void* memPtr) +{ + U32 val; memcpy(&val, memPtr, sizeof(val)); return val; +} + +MEM_STATIC U64 MEM_read64(const void* memPtr) +{ + U64 val; memcpy(&val, memPtr, sizeof(val)); return val; +} + +MEM_STATIC void MEM_write16(void* memPtr, U16 value) +{ + memcpy(memPtr, &value, sizeof(value)); +} + +MEM_STATIC U16 MEM_readLE16(const void* memPtr) +{ + if (MEM_isLittleEndian()) + return MEM_read16(memPtr); + else + { + const BYTE* p = (const BYTE*)memPtr; + return (U16)(p[0] + (p[1]<<8)); + } +} + +MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val) +{ + if (MEM_isLittleEndian()) + { + MEM_write16(memPtr, val); + } + else + { + BYTE* p = (BYTE*)memPtr; + p[0] = (BYTE)val; + p[1] = (BYTE)(val>>8); + } +} + +MEM_STATIC U32 MEM_readLE24(const void* memPtr) +{ + return MEM_readLE16(memPtr) + (((const BYTE*)memPtr)[2] << 16); +} + +MEM_STATIC U32 MEM_readLE32(const void* memPtr) +{ + if (MEM_isLittleEndian()) + return MEM_read32(memPtr); + else + { + const BYTE* p = (const BYTE*)memPtr; + return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24)); + } +} + + +MEM_STATIC U64 MEM_readLE64(const void* memPtr) +{ + if (MEM_isLittleEndian()) + return MEM_read64(memPtr); + else + { + const BYTE* p = (const BYTE*)memPtr; + return (U64)((U64)p[0] + ((U64)p[1]<<8) + ((U64)p[2]<<16) + ((U64)p[3]<<24) + + ((U64)p[4]<<32) + ((U64)p[5]<<40) + ((U64)p[6]<<48) + ((U64)p[7]<<56)); + } +} + + +MEM_STATIC size_t MEM_readLEST(const void* memPtr) +{ + if (MEM_32bits()) + return (size_t)MEM_readLE32(memPtr); + else + return (size_t)MEM_readLE64(memPtr); +} + +#if defined (__cplusplus) +} +#endif + +#endif /* MEM_H_MODULE */ + + +/* ****************************************************************** + bitstream + Part of NewGen Entropy library + header file (to include) + Copyright (C) 2013-2015, Yann Collet. + + BSD 2-Clause License (https://opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + - Public forum : https://groups.google.com/forum/#!forum/lz4c +****************************************************************** */ +#ifndef BITSTREAM_H_MODULE +#define BITSTREAM_H_MODULE + +#if defined (__cplusplus) +extern "C" { +#endif + + +/* +* This API consists of small unitary functions, which highly benefit from being inlined. +* Since link-time-optimization is not available for all compilers, +* these functions are defined into a .h to be included. +*/ + + +/********************************************** +* bitStream decompression API (read backward) +**********************************************/ +typedef struct +{ + size_t bitContainer; + unsigned bitsConsumed; + const char* ptr; + const char* start; +} BIT_DStream_t; + +typedef enum { BIT_DStream_unfinished = 0, + BIT_DStream_endOfBuffer = 1, + BIT_DStream_completed = 2, + BIT_DStream_overflow = 3 } BIT_DStream_status; /* result of BIT_reloadDStream() */ + /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */ + +MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize); +MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits); +MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD); +MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD); + + +/****************************************** +* unsafe API +******************************************/ +MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits); +/* faster, but works only if nbBits >= 1 */ + + + +/**************************************************************** +* Helper functions +****************************************************************/ +MEM_STATIC unsigned BIT_highbit32 (U32 val) +{ +# if defined(_MSC_VER) /* Visual */ + unsigned long r; + return _BitScanReverse(&r, val) ? (unsigned)r : 0; +# elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */ + return __builtin_clz (val) ^ 31; +# else /* Software version */ + static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; + U32 v = val; + unsigned r; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + r = DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27]; + return r; +# endif +} + + + +/********************************************************** +* bitStream decoding +**********************************************************/ + +/*!BIT_initDStream +* Initialize a BIT_DStream_t. +* @bitD : a pointer to an already allocated BIT_DStream_t structure +* @srcBuffer must point at the beginning of a bitStream +* @srcSize must be the exact size of the bitStream +* @result : size of stream (== srcSize) or an errorCode if a problem is detected +*/ +MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize) +{ + if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); } + + if (srcSize >= sizeof(size_t)) /* normal case */ + { + U32 contain32; + bitD->start = (const char*)srcBuffer; + bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(size_t); + bitD->bitContainer = MEM_readLEST(bitD->ptr); + contain32 = ((const BYTE*)srcBuffer)[srcSize-1]; + if (contain32 == 0) return ERROR(GENERIC); /* endMark not present */ + bitD->bitsConsumed = 8 - BIT_highbit32(contain32); + } + else + { + U32 contain32; + bitD->start = (const char*)srcBuffer; + bitD->ptr = bitD->start; + bitD->bitContainer = *(const BYTE*)(bitD->start); + switch(srcSize) + { + case 7: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[6]) << (sizeof(size_t)*8 - 16); + /* fallthrough */ + case 6: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[5]) << (sizeof(size_t)*8 - 24); + /* fallthrough */ + case 5: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[4]) << (sizeof(size_t)*8 - 32); + /* fallthrough */ + case 4: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[3]) << 24; + /* fallthrough */ + case 3: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[2]) << 16; + /* fallthrough */ + case 2: bitD->bitContainer += (size_t)(((const BYTE*)(bitD->start))[1]) << 8; + /* fallthrough */ + default:; + } + contain32 = ((const BYTE*)srcBuffer)[srcSize-1]; + if (contain32 == 0) return ERROR(GENERIC); /* endMark not present */ + bitD->bitsConsumed = 8 - BIT_highbit32(contain32); + bitD->bitsConsumed += (U32)(sizeof(size_t) - srcSize)*8; + } + + return srcSize; +} + +MEM_STATIC size_t BIT_lookBits(BIT_DStream_t* bitD, U32 nbBits) +{ + const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1; + return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask); +} + +/*! BIT_lookBitsFast : +* unsafe version; only works if nbBits >= 1 */ +MEM_STATIC size_t BIT_lookBitsFast(BIT_DStream_t* bitD, U32 nbBits) +{ + const U32 bitMask = sizeof(bitD->bitContainer)*8 - 1; + return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask+1)-nbBits) & bitMask); +} + +MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits) +{ + bitD->bitsConsumed += nbBits; +} + +MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, U32 nbBits) +{ + size_t value = BIT_lookBits(bitD, nbBits); + BIT_skipBits(bitD, nbBits); + return value; +} + +/*!BIT_readBitsFast : +* unsafe version; only works if nbBits >= 1 */ +MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, U32 nbBits) +{ + size_t value = BIT_lookBitsFast(bitD, nbBits); + BIT_skipBits(bitD, nbBits); + return value; +} + +MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD) +{ + if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* should never happen */ + return BIT_DStream_overflow; + + if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer)) + { + bitD->ptr -= bitD->bitsConsumed >> 3; + bitD->bitsConsumed &= 7; + bitD->bitContainer = MEM_readLEST(bitD->ptr); + return BIT_DStream_unfinished; + } + if (bitD->ptr == bitD->start) + { + if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer; + return BIT_DStream_completed; + } + { + U32 nbBytes = bitD->bitsConsumed >> 3; + BIT_DStream_status result = BIT_DStream_unfinished; + if (bitD->ptr - nbBytes < bitD->start) + { + nbBytes = (U32)(bitD->ptr - bitD->start); /* ptr > start */ + result = BIT_DStream_endOfBuffer; + } + bitD->ptr -= nbBytes; + bitD->bitsConsumed -= nbBytes*8; + bitD->bitContainer = MEM_readLEST(bitD->ptr); /* reminder : srcSize > sizeof(bitD) */ + return result; + } +} + +/*! BIT_endOfDStream +* @return Tells if DStream has reached its exact end +*/ +MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream) +{ + return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8)); +} + +#if defined (__cplusplus) +} +#endif + +#endif /* BITSTREAM_H_MODULE */ +/* ****************************************************************** + Error codes and messages + Copyright (C) 2013-2015, Yann Collet + + BSD 2-Clause License (https://opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + - Public forum : https://groups.google.com/forum/#!forum/lz4c +****************************************************************** */ +#ifndef ERROR_H_MODULE +#define ERROR_H_MODULE + +#if defined (__cplusplus) +extern "C" { +#endif + + +/****************************************** +* Compiler-specific +******************************************/ +#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +# define ERR_STATIC static inline +#elif defined(_MSC_VER) +# define ERR_STATIC static __inline +#elif defined(__GNUC__) +# define ERR_STATIC static __attribute__((unused)) +#else +# define ERR_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */ +#endif + + +/****************************************** +* Error Management +******************************************/ +#define PREFIX(name) ZSTD_error_##name + +#define ERROR(name) (size_t)-PREFIX(name) + +#define ERROR_LIST(ITEM) \ + ITEM(PREFIX(No_Error)) ITEM(PREFIX(GENERIC)) \ + ITEM(PREFIX(dstSize_tooSmall)) ITEM(PREFIX(srcSize_wrong)) \ + ITEM(PREFIX(prefix_unknown)) ITEM(PREFIX(corruption_detected)) \ + ITEM(PREFIX(tableLog_tooLarge)) ITEM(PREFIX(maxSymbolValue_tooLarge)) ITEM(PREFIX(maxSymbolValue_tooSmall)) \ + ITEM(PREFIX(maxCode)) + +#define ERROR_GENERATE_ENUM(ENUM) ENUM, +typedef enum { ERROR_LIST(ERROR_GENERATE_ENUM) } ERR_codes; /* enum is exposed, to detect & handle specific errors; compare function result to -enum value */ + +#define ERROR_CONVERTTOSTRING(STRING) #STRING, +#define ERROR_GENERATE_STRING(EXPR) ERROR_CONVERTTOSTRING(EXPR) +static const char* ERR_strings[] = { ERROR_LIST(ERROR_GENERATE_STRING) }; + +ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); } + +ERR_STATIC const char* ERR_getErrorName(size_t code) +{ + static const char* codeError = "Unspecified error code"; + if (ERR_isError(code)) return ERR_strings[-(int)(code)]; + return codeError; +} + + +#if defined (__cplusplus) +} +#endif + +#endif /* ERROR_H_MODULE */ +/* +Constructor and Destructor of type FSE_CTable + Note that its size depends on 'tableLog' and 'maxSymbolValue' */ +typedef unsigned FSE_CTable; /* don't allocate that. It's just a way to be more restrictive than void* */ +typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */ + + +/* ****************************************************************** + FSE : Finite State Entropy coder + header file for static linking (only) + Copyright (C) 2013-2015, Yann Collet + + BSD 2-Clause License (https://opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + - Public forum : https://groups.google.com/forum/#!forum/lz4c +****************************************************************** */ +#if defined (__cplusplus) +extern "C" { +#endif + + +/****************************************** +* Static allocation +******************************************/ +/* FSE buffer bounds */ +#define FSE_NCOUNTBOUND 512 +#define FSE_BLOCKBOUND(size) (size + (size>>7)) +#define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size)) /* Macro version, useful for static allocation */ + +/* You can statically allocate FSE CTable/DTable as a table of unsigned using below macro */ +#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) (1 + (1<<(maxTableLog-1)) + ((maxSymbolValue+1)*2)) +#define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1<= 1 (otherwise, result will be corrupted) */ + + +/****************************************** +* Implementation of inline functions +******************************************/ + +/* decompression */ + +typedef struct { + U16 tableLog; + U16 fastMode; +} FSE_DTableHeader; /* sizeof U32 */ + +typedef struct +{ + unsigned short newState; + unsigned char symbol; + unsigned char nbBits; +} FSE_decode_t; /* size == U32 */ + +MEM_STATIC void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt) +{ + FSE_DTableHeader DTableH; + memcpy(&DTableH, dt, sizeof(DTableH)); + DStatePtr->state = BIT_readBits(bitD, DTableH.tableLog); + BIT_reloadDStream(bitD); + DStatePtr->table = dt + 1; +} + +MEM_STATIC BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) +{ + const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; + const U32 nbBits = DInfo.nbBits; + BYTE symbol = DInfo.symbol; + size_t lowBits = BIT_readBits(bitD, nbBits); + + DStatePtr->state = DInfo.newState + lowBits; + return symbol; +} + +MEM_STATIC BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) +{ + const FSE_decode_t DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; + const U32 nbBits = DInfo.nbBits; + BYTE symbol = DInfo.symbol; + size_t lowBits = BIT_readBitsFast(bitD, nbBits); + + DStatePtr->state = DInfo.newState + lowBits; + return symbol; +} + +MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr) +{ + return DStatePtr->state == 0; +} + + +#if defined (__cplusplus) +} +#endif +/* ****************************************************************** + Huff0 : Huffman coder, part of New Generation Entropy library + header file for static linking (only) + Copyright (C) 2013-2015, Yann Collet + + BSD 2-Clause License (https://opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + - Public forum : https://groups.google.com/forum/#!forum/lz4c +****************************************************************** */ + +#if defined (__cplusplus) +extern "C" { +#endif + +/****************************************** +* Static allocation macros +******************************************/ +/* Huff0 buffer bounds */ +#define HUF_CTABLEBOUND 129 +#define HUF_BLOCKBOUND(size) (size + (size>>8) + 8) /* only true if incompressible pre-filtered with fast heuristic */ +#define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size)) /* Macro version, useful for static allocation */ + +/* static allocation of Huff0's DTable */ +#define HUF_DTABLE_SIZE(maxTableLog) (1 + (1< /* size_t */ + + +/* ************************************* +* Version +***************************************/ +#define ZSTD_VERSION_MAJOR 0 /* for breaking interface changes */ +#define ZSTD_VERSION_MINOR 2 /* for new (non-breaking) interface capabilities */ +#define ZSTD_VERSION_RELEASE 2 /* for tweaks, bug-fixes, or development */ +#define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE) + + +/* ************************************* +* Advanced functions +***************************************/ +typedef struct ZSTD_CCtx_s ZSTD_CCtx; /* incomplete type */ + +#if defined (__cplusplus) +} +#endif +/* + zstd - standard compression library + Header File for static linking only + Copyright (C) 2014-2015, Yann Collet. + + BSD 2-Clause License (https://opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - zstd source repository : https://github.com/Cyan4973/zstd + - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c +*/ + +/* The objects defined into this file should be considered experimental. + * They are not labelled stable, as their prototype may change in the future. + * You can use them for tests, provide feedback, or if you can endure risk of future changes. + */ + +#if defined (__cplusplus) +extern "C" { +#endif + +/* ************************************* +* Streaming functions +***************************************/ + +typedef struct ZSTDv02_Dctx_s ZSTD_DCtx; + +/* + Use above functions alternatively. + ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue(). + ZSTD_decompressContinue() will use previous data blocks to improve compression if they are located prior to current block. + Result is the number of bytes regenerated within 'dst'. + It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header. +*/ + +/* ************************************* +* Prefix - version detection +***************************************/ +#define ZSTD_magicNumber 0xFD2FB522 /* v0.2 (current)*/ + + +#if defined (__cplusplus) +} +#endif +/* ****************************************************************** + FSE : Finite State Entropy coder + Copyright (C) 2013-2015, Yann Collet. + + BSD 2-Clause License (https://opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy + - Public forum : https://groups.google.com/forum/#!forum/lz4c +****************************************************************** */ + +#ifndef FSE_COMMONDEFS_ONLY + +/**************************************************************** +* Tuning parameters +****************************************************************/ +/* MEMORY_USAGE : +* Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.) +* Increasing memory usage improves compression ratio +* Reduced memory usage can improve speed, due to cache effect +* Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */ +#define FSE_MAX_MEMORY_USAGE 14 +#define FSE_DEFAULT_MEMORY_USAGE 13 + +/* FSE_MAX_SYMBOL_VALUE : +* Maximum symbol value authorized. +* Required for proper stack allocation */ +#define FSE_MAX_SYMBOL_VALUE 255 + + +/**************************************************************** +* template functions type & suffix +****************************************************************/ +#define FSE_FUNCTION_TYPE BYTE +#define FSE_FUNCTION_EXTENSION + + +/**************************************************************** +* Byte symbol type +****************************************************************/ +#endif /* !FSE_COMMONDEFS_ONLY */ + + +/**************************************************************** +* Compiler specifics +****************************************************************/ +#ifdef _MSC_VER /* Visual Studio */ +# define FORCE_INLINE static __forceinline +# include /* For Visual 2005 */ +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +# pragma warning(disable : 4214) /* disable: C4214: non-int bitfields */ +#else +# if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ +# ifdef __GNUC__ +# define FORCE_INLINE static inline __attribute__((always_inline)) +# else +# define FORCE_INLINE static inline +# endif +# else +# define FORCE_INLINE static +# endif /* __STDC_VERSION__ */ +#endif + + +/**************************************************************** +* Includes +****************************************************************/ +#include /* malloc, free, qsort */ +#include /* memcpy, memset */ +#include /* printf (debug) */ + +/**************************************************************** +* Constants +*****************************************************************/ +#define FSE_MAX_TABLELOG (FSE_MAX_MEMORY_USAGE-2) +#define FSE_MAX_TABLESIZE (1U< FSE_TABLELOG_ABSOLUTE_MAX +#error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported" +#endif + + +/**************************************************************** +* Error Management +****************************************************************/ +#define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ + + +/**************************************************************** +* Complex types +****************************************************************/ +typedef U32 DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)]; + + +/**************************************************************** +* Templates +****************************************************************/ +/* + designed to be included + for type-specific functions (template emulation in C) + Objective is to write these functions only once, for improved maintenance +*/ + +/* safety checks */ +#ifndef FSE_FUNCTION_EXTENSION +# error "FSE_FUNCTION_EXTENSION must be defined" +#endif +#ifndef FSE_FUNCTION_TYPE +# error "FSE_FUNCTION_TYPE must be defined" +#endif + +/* Function names */ +#define FSE_CAT(X,Y) X##Y +#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y) +#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y) + + +/* Function templates */ + +#define FSE_DECODE_TYPE FSE_decode_t + +static U32 FSE_tableStep(U32 tableSize) { return (tableSize>>1) + (tableSize>>3) + 3; } + +static size_t FSE_buildDTable +(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) +{ + void* ptr = dt+1; + FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*)ptr; + FSE_DTableHeader DTableH; + const U32 tableSize = 1 << tableLog; + const U32 tableMask = tableSize-1; + const U32 step = FSE_tableStep(tableSize); + U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1]; + U32 position = 0; + U32 highThreshold = tableSize-1; + const S16 largeLimit= (S16)(1 << (tableLog-1)); + U32 noLarge = 1; + U32 s; + + /* Sanity Checks */ + if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge); + if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); + + /* Init, lay down lowprob symbols */ + DTableH.tableLog = (U16)tableLog; + for (s=0; s<=maxSymbolValue; s++) + { + if (normalizedCounter[s]==-1) + { + tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s; + symbolNext[s] = 1; + } + else + { + if (normalizedCounter[s] >= largeLimit) noLarge=0; + symbolNext[s] = normalizedCounter[s]; + } + } + + /* Spread symbols */ + for (s=0; s<=maxSymbolValue; s++) + { + int i; + for (i=0; i highThreshold) position = (position + step) & tableMask; /* lowprob area */ + } + } + + if (position!=0) return ERROR(GENERIC); /* position must reach all cells once, otherwise normalizedCounter is incorrect */ + + /* Build Decoding table */ + { + U32 i; + for (i=0; i FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge); + bitStream >>= 4; + bitCount = 4; + *tableLogPtr = nbBits; + remaining = (1<1) && (charnum<=*maxSVPtr)) + { + if (previous0) + { + unsigned n0 = charnum; + while ((bitStream & 0xFFFF) == 0xFFFF) + { + n0+=24; + if (ip < iend-5) + { + ip+=2; + bitStream = MEM_readLE32(ip) >> bitCount; + } + else + { + bitStream >>= 16; + bitCount+=16; + } + } + while ((bitStream & 3) == 3) + { + n0+=3; + bitStream>>=2; + bitCount+=2; + } + n0 += bitStream & 3; + bitCount += 2; + if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall); + while (charnum < n0) normalizedCounter[charnum++] = 0; + if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) + { + ip += bitCount>>3; + bitCount &= 7; + bitStream = MEM_readLE32(ip) >> bitCount; + } + else + bitStream >>= 2; + } + { + const short max = (short)((2*threshold-1)-remaining); + short count; + + if ((bitStream & (threshold-1)) < (U32)max) + { + count = (short)(bitStream & (threshold-1)); + bitCount += nbBits-1; + } + else + { + count = (short)(bitStream & (2*threshold-1)); + if (count >= threshold) count -= max; + bitCount += nbBits; + } + + count--; /* extra accuracy */ + remaining -= FSE_abs(count); + normalizedCounter[charnum++] = count; + previous0 = !count; + while (remaining < threshold) + { + nbBits--; + threshold >>= 1; + } + + { + if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) + { + ip += bitCount>>3; + bitCount &= 7; + } + else + { + bitCount -= (int)(8 * (iend - 4 - ip)); + ip = iend - 4; + } + bitStream = MEM_readLE32(ip) >> (bitCount & 31); + } + } + } + if (remaining != 1) return ERROR(GENERIC); + *maxSVPtr = charnum-1; + + ip += (bitCount+7)>>3; + if ((size_t)(ip-istart) > hbSize) return ERROR(srcSize_wrong); + return ip-istart; +} + + +/********************************************************* +* Decompression (Byte symbols) +*********************************************************/ +static size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue) +{ + void* ptr = dt; + FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr; + FSE_decode_t* const cell = (FSE_decode_t*)(ptr) + 1; /* because dt is unsigned */ + + DTableH->tableLog = 0; + DTableH->fastMode = 0; + + cell->newState = 0; + cell->symbol = symbolValue; + cell->nbBits = 0; + + return 0; +} + + +static size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits) +{ + void* ptr = dt; + FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr; + FSE_decode_t* const dinfo = (FSE_decode_t*)(ptr) + 1; /* because dt is unsigned */ + const unsigned tableSize = 1 << nbBits; + const unsigned tableMask = tableSize - 1; + const unsigned maxSymbolValue = tableMask; + unsigned s; + + /* Sanity checks */ + if (nbBits < 1) return ERROR(GENERIC); /* min size */ + + /* Build Decoding Table */ + DTableH->tableLog = (U16)nbBits; + DTableH->fastMode = 1; + for (s=0; s<=maxSymbolValue; s++) + { + dinfo[s].newState = 0; + dinfo[s].symbol = (BYTE)s; + dinfo[s].nbBits = (BYTE)nbBits; + } + + return 0; +} + +FORCE_INLINE size_t FSE_decompress_usingDTable_generic( + void* dst, size_t maxDstSize, + const void* cSrc, size_t cSrcSize, + const FSE_DTable* dt, const unsigned fast) +{ + BYTE* const ostart = (BYTE*) dst; + BYTE* op = ostart; + BYTE* const omax = op + maxDstSize; + BYTE* const olimit = omax-3; + + BIT_DStream_t bitD; + FSE_DState_t state1; + FSE_DState_t state2; + size_t errorCode; + + /* Init */ + errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize); /* replaced last arg by maxCompressed Size */ + if (FSE_isError(errorCode)) return errorCode; + + FSE_initDState(&state1, &bitD, dt); + FSE_initDState(&state2, &bitD, dt); + +#define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD) + + /* 4 symbols per loop */ + for ( ; (BIT_reloadDStream(&bitD)==BIT_DStream_unfinished) && (op sizeof(bitD.bitContainer)*8) /* This test must be static */ + BIT_reloadDStream(&bitD); + + op[1] = FSE_GETSYMBOL(&state2); + + if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */ + { if (BIT_reloadDStream(&bitD) > BIT_DStream_unfinished) { op+=2; break; } } + + op[2] = FSE_GETSYMBOL(&state1); + + if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */ + BIT_reloadDStream(&bitD); + + op[3] = FSE_GETSYMBOL(&state2); + } + + /* tail */ + /* note : BIT_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly BIT_DStream_completed */ + while (1) + { + if ( (BIT_reloadDStream(&bitD)>BIT_DStream_completed) || (op==omax) || (BIT_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state1))) ) + break; + + *op++ = FSE_GETSYMBOL(&state1); + + if ( (BIT_reloadDStream(&bitD)>BIT_DStream_completed) || (op==omax) || (BIT_endOfDStream(&bitD) && (fast || FSE_endOfDState(&state2))) ) + break; + + *op++ = FSE_GETSYMBOL(&state2); + } + + /* end ? */ + if (BIT_endOfDStream(&bitD) && FSE_endOfDState(&state1) && FSE_endOfDState(&state2)) + return op-ostart; + + if (op==omax) return ERROR(dstSize_tooSmall); /* dst buffer is full, but cSrc unfinished */ + + return ERROR(corruption_detected); +} + + +static size_t FSE_decompress_usingDTable(void* dst, size_t originalSize, + const void* cSrc, size_t cSrcSize, + const FSE_DTable* dt) +{ + FSE_DTableHeader DTableH; + memcpy(&DTableH, dt, sizeof(DTableH)); + + /* select fast mode (static) */ + if (DTableH.fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1); + return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0); +} + + +static size_t FSE_decompress(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize) +{ + const BYTE* const istart = (const BYTE*)cSrc; + const BYTE* ip = istart; + short counting[FSE_MAX_SYMBOL_VALUE+1]; + DTable_max_t dt; /* Static analyzer seems unable to understand this table will be properly initialized later */ + unsigned tableLog; + unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE; + size_t errorCode; + + if (cSrcSize<2) return ERROR(srcSize_wrong); /* too small input size */ + + /* normal FSE decoding mode */ + errorCode = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize); + if (FSE_isError(errorCode)) return errorCode; + if (errorCode >= cSrcSize) return ERROR(srcSize_wrong); /* too small input size */ + ip += errorCode; + cSrcSize -= errorCode; + + errorCode = FSE_buildDTable (dt, counting, maxSymbolValue, tableLog); + if (FSE_isError(errorCode)) return errorCode; + + /* always return, even if it is an error code */ + return FSE_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, dt); +} + + + +#endif /* FSE_COMMONDEFS_ONLY */ +/* ****************************************************************** + Huff0 : Huffman coder, part of New Generation Entropy library + Copyright (C) 2013-2015, Yann Collet. + + BSD 2-Clause License (https://opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + You can contact the author at : + - FSE+Huff0 source repository : https://github.com/Cyan4973/FiniteStateEntropy + - Public forum : https://groups.google.com/forum/#!forum/lz4c +****************************************************************** */ + +/**************************************************************** +* Compiler specifics +****************************************************************/ +#if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +/* inline is defined */ +#elif defined(_MSC_VER) +# define inline __inline +#else +# define inline /* disable inline */ +#endif + + +#ifdef _MSC_VER /* Visual Studio */ +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +#endif + + +/**************************************************************** +* Includes +****************************************************************/ +#include /* malloc, free, qsort */ +#include /* memcpy, memset */ +#include /* printf (debug) */ + +/**************************************************************** +* Error Management +****************************************************************/ +#define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ + + +/****************************************** +* Helper functions +******************************************/ +static unsigned HUF_isError(size_t code) { return ERR_isError(code); } + +#define HUF_ABSOLUTEMAX_TABLELOG 16 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */ +#define HUF_MAX_TABLELOG 12 /* max configured tableLog (for static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */ +#define HUF_DEFAULT_TABLELOG HUF_MAX_TABLELOG /* tableLog by default, when not specified */ +#define HUF_MAX_SYMBOL_VALUE 255 +#if (HUF_MAX_TABLELOG > HUF_ABSOLUTEMAX_TABLELOG) +# error "HUF_MAX_TABLELOG is too large !" +#endif + + + +/********************************************************* +* Huff0 : Huffman block decompression +*********************************************************/ +typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX2; /* single-symbol decoding */ + +typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX4; /* double-symbols decoding */ + +typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t; + +/*! HUF_readStats + Read compact Huffman tree, saved by HUF_writeCTable + @huffWeight : destination buffer + @return : size read from `src` +*/ +static size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, + U32* nbSymbolsPtr, U32* tableLogPtr, + const void* src, size_t srcSize) +{ + U32 weightTotal; + U32 tableLog; + const BYTE* ip = (const BYTE*) src; + size_t iSize; + size_t oSize; + U32 n; + + if (!srcSize) return ERROR(srcSize_wrong); + iSize = ip[0]; + //memset(huffWeight, 0, hwSize); /* is not necessary, even though some analyzer complain ... */ + + if (iSize >= 128) /* special header */ + { + if (iSize >= (242)) /* RLE */ + { + static int l[14] = { 1, 2, 3, 4, 7, 8, 15, 16, 31, 32, 63, 64, 127, 128 }; + oSize = l[iSize-242]; + memset(huffWeight, 1, hwSize); + iSize = 0; + } + else /* Incompressible */ + { + oSize = iSize - 127; + iSize = ((oSize+1)/2); + if (iSize+1 > srcSize) return ERROR(srcSize_wrong); + if (oSize >= hwSize) return ERROR(corruption_detected); + ip += 1; + for (n=0; n> 4; + huffWeight[n+1] = ip[n/2] & 15; + } + } + } + else /* header compressed with FSE (normal case) */ + { + if (iSize+1 > srcSize) return ERROR(srcSize_wrong); + oSize = FSE_decompress(huffWeight, hwSize-1, ip+1, iSize); /* max (hwSize-1) values decoded, as last one is implied */ + if (FSE_isError(oSize)) return oSize; + } + + /* collect weight stats */ + memset(rankStats, 0, (HUF_ABSOLUTEMAX_TABLELOG + 1) * sizeof(U32)); + weightTotal = 0; + for (n=0; n= HUF_ABSOLUTEMAX_TABLELOG) return ERROR(corruption_detected); + rankStats[huffWeight[n]]++; + weightTotal += (1 << huffWeight[n]) >> 1; + } + if (weightTotal == 0) return ERROR(corruption_detected); + + /* get last non-null symbol weight (implied, total must be 2^n) */ + tableLog = BIT_highbit32(weightTotal) + 1; + if (tableLog > HUF_ABSOLUTEMAX_TABLELOG) return ERROR(corruption_detected); + { + U32 total = 1 << tableLog; + U32 rest = total - weightTotal; + U32 verif = 1 << BIT_highbit32(rest); + U32 lastWeight = BIT_highbit32(rest) + 1; + if (verif != rest) return ERROR(corruption_detected); /* last value must be a clean power of 2 */ + huffWeight[oSize] = (BYTE)lastWeight; + rankStats[lastWeight]++; + } + + /* check tree construction validity */ + if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected); /* by construction : at least 2 elts of rank 1, must be even */ + + /* results */ + *nbSymbolsPtr = (U32)(oSize+1); + *tableLogPtr = tableLog; + return iSize+1; +} + + +/**************************/ +/* single-symbol decoding */ +/**************************/ + +static size_t HUF_readDTableX2 (U16* DTable, const void* src, size_t srcSize) +{ + BYTE huffWeight[HUF_MAX_SYMBOL_VALUE + 1]; + U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1]; /* large enough for values from 0 to 16 */ + U32 tableLog = 0; + const BYTE* ip = (const BYTE*) src; + size_t iSize = ip[0]; + U32 nbSymbols = 0; + U32 n; + U32 nextRankStart; + void* ptr = DTable+1; + HUF_DEltX2* const dt = (HUF_DEltX2*)ptr; + + HUF_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(U16)); /* if compilation fails here, assertion is false */ + //memset(huffWeight, 0, sizeof(huffWeight)); /* is not necessary, even though some analyzer complain ... */ + + iSize = HUF_readStats(huffWeight, HUF_MAX_SYMBOL_VALUE + 1, rankVal, &nbSymbols, &tableLog, src, srcSize); + if (HUF_isError(iSize)) return iSize; + + /* check result */ + if (tableLog > DTable[0]) return ERROR(tableLog_tooLarge); /* DTable is too small */ + DTable[0] = (U16)tableLog; /* maybe should separate sizeof DTable, as allocated, from used size of DTable, in case of DTable re-use */ + + /* Prepare ranks */ + nextRankStart = 0; + for (n=1; n<=tableLog; n++) + { + U32 current = nextRankStart; + nextRankStart += (rankVal[n] << (n-1)); + rankVal[n] = current; + } + + /* fill DTable */ + for (n=0; n> 1; + U32 i; + HUF_DEltX2 D; + D.byte = (BYTE)n; D.nbBits = (BYTE)(tableLog + 1 - w); + for (i = rankVal[w]; i < rankVal[w] + length; i++) + dt[i] = D; + rankVal[w] += length; + } + + return iSize; +} + +static BYTE HUF_decodeSymbolX2(BIT_DStream_t* Dstream, const HUF_DEltX2* dt, const U32 dtLog) +{ + const size_t val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */ + const BYTE c = dt[val].byte; + BIT_skipBits(Dstream, dt[val].nbBits); + return c; +} + +#define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \ + *ptr++ = HUF_decodeSymbolX2(DStreamPtr, dt, dtLog) + +#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \ + if (MEM_64bits() || (HUF_MAX_TABLELOG<=12)) \ + HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) + +#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \ + if (MEM_64bits()) \ + HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) + +static inline size_t HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX2* const dt, const U32 dtLog) +{ + BYTE* const pStart = p; + + /* up to 4 symbols at a time */ + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd-4)) + { + HUF_DECODE_SYMBOLX2_2(p, bitDPtr); + HUF_DECODE_SYMBOLX2_1(p, bitDPtr); + HUF_DECODE_SYMBOLX2_2(p, bitDPtr); + HUF_DECODE_SYMBOLX2_0(p, bitDPtr); + } + + /* closer to the end */ + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p < pEnd)) + HUF_DECODE_SYMBOLX2_0(p, bitDPtr); + + /* no more data to retrieve from bitstream, hence no need to reload */ + while (p < pEnd) + HUF_DECODE_SYMBOLX2_0(p, bitDPtr); + + return pEnd-pStart; +} + + +static size_t HUF_decompress4X2_usingDTable( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const U16* DTable) +{ + if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ + + { + const BYTE* const istart = (const BYTE*) cSrc; + BYTE* const ostart = (BYTE*) dst; + BYTE* const oend = ostart + dstSize; + + const void* ptr = DTable; + const HUF_DEltX2* const dt = ((const HUF_DEltX2*)ptr) +1; + const U32 dtLog = DTable[0]; + size_t errorCode; + + /* Init */ + BIT_DStream_t bitD1; + BIT_DStream_t bitD2; + BIT_DStream_t bitD3; + BIT_DStream_t bitD4; + const size_t length1 = MEM_readLE16(istart); + const size_t length2 = MEM_readLE16(istart+2); + const size_t length3 = MEM_readLE16(istart+4); + size_t length4; + const BYTE* const istart1 = istart + 6; /* jumpTable */ + const BYTE* const istart2 = istart1 + length1; + const BYTE* const istart3 = istart2 + length2; + const BYTE* const istart4 = istart3 + length3; + const size_t segmentSize = (dstSize+3) / 4; + BYTE* const opStart2 = ostart + segmentSize; + BYTE* const opStart3 = opStart2 + segmentSize; + BYTE* const opStart4 = opStart3 + segmentSize; + BYTE* op1 = ostart; + BYTE* op2 = opStart2; + BYTE* op3 = opStart3; + BYTE* op4 = opStart4; + U32 endSignal; + + length4 = cSrcSize - (length1 + length2 + length3 + 6); + if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ + errorCode = BIT_initDStream(&bitD1, istart1, length1); + if (HUF_isError(errorCode)) return errorCode; + errorCode = BIT_initDStream(&bitD2, istart2, length2); + if (HUF_isError(errorCode)) return errorCode; + errorCode = BIT_initDStream(&bitD3, istart3, length3); + if (HUF_isError(errorCode)) return errorCode; + errorCode = BIT_initDStream(&bitD4, istart4, length4); + if (HUF_isError(errorCode)) return errorCode; + + /* 16-32 symbols per loop (4-8 symbols per stream) */ + endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); + for ( ; (endSignal==BIT_DStream_unfinished) && (op4<(oend-7)) ; ) + { + HUF_DECODE_SYMBOLX2_2(op1, &bitD1); + HUF_DECODE_SYMBOLX2_2(op2, &bitD2); + HUF_DECODE_SYMBOLX2_2(op3, &bitD3); + HUF_DECODE_SYMBOLX2_2(op4, &bitD4); + HUF_DECODE_SYMBOLX2_1(op1, &bitD1); + HUF_DECODE_SYMBOLX2_1(op2, &bitD2); + HUF_DECODE_SYMBOLX2_1(op3, &bitD3); + HUF_DECODE_SYMBOLX2_1(op4, &bitD4); + HUF_DECODE_SYMBOLX2_2(op1, &bitD1); + HUF_DECODE_SYMBOLX2_2(op2, &bitD2); + HUF_DECODE_SYMBOLX2_2(op3, &bitD3); + HUF_DECODE_SYMBOLX2_2(op4, &bitD4); + HUF_DECODE_SYMBOLX2_0(op1, &bitD1); + HUF_DECODE_SYMBOLX2_0(op2, &bitD2); + HUF_DECODE_SYMBOLX2_0(op3, &bitD3); + HUF_DECODE_SYMBOLX2_0(op4, &bitD4); + + endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); + } + + /* check corruption */ + if (op1 > opStart2) return ERROR(corruption_detected); + if (op2 > opStart3) return ERROR(corruption_detected); + if (op3 > opStart4) return ERROR(corruption_detected); + /* note : op4 supposed already verified within main loop */ + + /* finish bitStreams one by one */ + HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog); + HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog); + HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog); + HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog); + + /* check */ + endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); + if (!endSignal) return ERROR(corruption_detected); + + /* decoded size */ + return dstSize; + } +} + + +static size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_MAX_TABLELOG); + const BYTE* ip = (const BYTE*) cSrc; + size_t errorCode; + + errorCode = HUF_readDTableX2 (DTable, cSrc, cSrcSize); + if (HUF_isError(errorCode)) return errorCode; + if (errorCode >= cSrcSize) return ERROR(srcSize_wrong); + ip += errorCode; + cSrcSize -= errorCode; + + return HUF_decompress4X2_usingDTable (dst, dstSize, ip, cSrcSize, DTable); +} + + +/***************************/ +/* double-symbols decoding */ +/***************************/ + +static void HUF_fillDTableX4Level2(HUF_DEltX4* DTable, U32 sizeLog, const U32 consumed, + const U32* rankValOrigin, const int minWeight, + const sortedSymbol_t* sortedSymbols, const U32 sortedListSize, + U32 nbBitsBaseline, U16 baseSeq) +{ + HUF_DEltX4 DElt; + U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1]; + U32 s; + + /* get pre-calculated rankVal */ + memcpy(rankVal, rankValOrigin, sizeof(rankVal)); + + /* fill skipped values */ + if (minWeight>1) + { + U32 i, skipSize = rankVal[minWeight]; + MEM_writeLE16(&(DElt.sequence), baseSeq); + DElt.nbBits = (BYTE)(consumed); + DElt.length = 1; + for (i = 0; i < skipSize; i++) + DTable[i] = DElt; + } + + /* fill DTable */ + for (s=0; s= 1 */ + + rankVal[weight] += length; + } +} + +typedef U32 rankVal_t[HUF_ABSOLUTEMAX_TABLELOG][HUF_ABSOLUTEMAX_TABLELOG + 1]; + +static void HUF_fillDTableX4(HUF_DEltX4* DTable, const U32 targetLog, + const sortedSymbol_t* sortedList, const U32 sortedListSize, + const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight, + const U32 nbBitsBaseline) +{ + U32 rankVal[HUF_ABSOLUTEMAX_TABLELOG + 1]; + const int scaleLog = nbBitsBaseline - targetLog; /* note : targetLog >= srcLog, hence scaleLog <= 1 */ + const U32 minBits = nbBitsBaseline - maxWeight; + U32 s; + + memcpy(rankVal, rankValOrigin, sizeof(rankVal)); + + /* fill DTable */ + for (s=0; s= minBits) /* enough room for a second symbol */ + { + U32 sortedRank; + int minWeight = nbBits + scaleLog; + if (minWeight < 1) minWeight = 1; + sortedRank = rankStart[minWeight]; + HUF_fillDTableX4Level2(DTable+start, targetLog-nbBits, nbBits, + rankValOrigin[nbBits], minWeight, + sortedList+sortedRank, sortedListSize-sortedRank, + nbBitsBaseline, symbol); + } + else + { + U32 i; + const U32 end = start + length; + HUF_DEltX4 DElt; + + MEM_writeLE16(&(DElt.sequence), symbol); + DElt.nbBits = (BYTE)(nbBits); + DElt.length = 1; + for (i = start; i < end; i++) + DTable[i] = DElt; + } + rankVal[weight] += length; + } +} + +static size_t HUF_readDTableX4 (U32* DTable, const void* src, size_t srcSize) +{ + BYTE weightList[HUF_MAX_SYMBOL_VALUE + 1]; + sortedSymbol_t sortedSymbol[HUF_MAX_SYMBOL_VALUE + 1]; + U32 rankStats[HUF_ABSOLUTEMAX_TABLELOG + 1] = { 0 }; + U32 rankStart0[HUF_ABSOLUTEMAX_TABLELOG + 2] = { 0 }; + U32* const rankStart = rankStart0+1; + rankVal_t rankVal; + U32 tableLog, maxW, sizeOfSort, nbSymbols; + const U32 memLog = DTable[0]; + const BYTE* ip = (const BYTE*) src; + size_t iSize = ip[0]; + void* ptr = DTable; + HUF_DEltX4* const dt = ((HUF_DEltX4*)ptr) + 1; + + HUF_STATIC_ASSERT(sizeof(HUF_DEltX4) == sizeof(U32)); /* if compilation fails here, assertion is false */ + if (memLog > HUF_ABSOLUTEMAX_TABLELOG) return ERROR(tableLog_tooLarge); + //memset(weightList, 0, sizeof(weightList)); /* is not necessary, even though some analyzer complain ... */ + + iSize = HUF_readStats(weightList, HUF_MAX_SYMBOL_VALUE + 1, rankStats, &nbSymbols, &tableLog, src, srcSize); + if (HUF_isError(iSize)) return iSize; + + /* check result */ + if (tableLog > memLog) return ERROR(tableLog_tooLarge); /* DTable can't fit code depth */ + + /* find maxWeight */ + for (maxW = tableLog; rankStats[maxW]==0; maxW--) + {if (!maxW) return ERROR(GENERIC); } /* necessarily finds a solution before maxW==0 */ + + /* Get start index of each weight */ + { + U32 w, nextRankStart = 0; + for (w=1; w<=maxW; w++) + { + U32 current = nextRankStart; + nextRankStart += rankStats[w]; + rankStart[w] = current; + } + rankStart[0] = nextRankStart; /* put all 0w symbols at the end of sorted list*/ + sizeOfSort = nextRankStart; + } + + /* sort symbols by weight */ + { + U32 s; + for (s=0; s> consumed; + } + } + } + + HUF_fillDTableX4(dt, memLog, + sortedSymbol, sizeOfSort, + rankStart0, rankVal, maxW, + tableLog+1); + + return iSize; +} + + +static U32 HUF_decodeSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog) +{ + const size_t val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ + memcpy(op, dt+val, 2); + BIT_skipBits(DStream, dt[val].nbBits); + return dt[val].length; +} + +static U32 HUF_decodeLastSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog) +{ + const size_t val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ + memcpy(op, dt+val, 1); + if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits); + else + { + if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) + { + BIT_skipBits(DStream, dt[val].nbBits); + if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8)) + DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8); /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */ + } + } + return 1; +} + + +#define HUF_DECODE_SYMBOLX4_0(ptr, DStreamPtr) \ + ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog) + +#define HUF_DECODE_SYMBOLX4_1(ptr, DStreamPtr) \ + if (MEM_64bits() || (HUF_MAX_TABLELOG<=12)) \ + ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog) + +#define HUF_DECODE_SYMBOLX4_2(ptr, DStreamPtr) \ + if (MEM_64bits()) \ + ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog) + +static inline size_t HUF_decodeStreamX4(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, const HUF_DEltX4* const dt, const U32 dtLog) +{ + BYTE* const pStart = p; + + /* up to 8 symbols at a time */ + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p < pEnd-7)) + { + HUF_DECODE_SYMBOLX4_2(p, bitDPtr); + HUF_DECODE_SYMBOLX4_1(p, bitDPtr); + HUF_DECODE_SYMBOLX4_2(p, bitDPtr); + HUF_DECODE_SYMBOLX4_0(p, bitDPtr); + } + + /* closer to the end */ + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd-2)) + HUF_DECODE_SYMBOLX4_0(p, bitDPtr); + + while (p <= pEnd-2) + HUF_DECODE_SYMBOLX4_0(p, bitDPtr); /* no need to reload : reached the end of DStream */ + + if (p < pEnd) + p += HUF_decodeLastSymbolX4(p, bitDPtr, dt, dtLog); + + return p-pStart; +} + + + +static size_t HUF_decompress4X4_usingDTable( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const U32* DTable) +{ + if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ + + { + const BYTE* const istart = (const BYTE*) cSrc; + BYTE* const ostart = (BYTE*) dst; + BYTE* const oend = ostart + dstSize; + + const void* ptr = DTable; + const HUF_DEltX4* const dt = ((const HUF_DEltX4*)ptr) +1; + const U32 dtLog = DTable[0]; + size_t errorCode; + + /* Init */ + BIT_DStream_t bitD1; + BIT_DStream_t bitD2; + BIT_DStream_t bitD3; + BIT_DStream_t bitD4; + const size_t length1 = MEM_readLE16(istart); + const size_t length2 = MEM_readLE16(istart+2); + const size_t length3 = MEM_readLE16(istart+4); + size_t length4; + const BYTE* const istart1 = istart + 6; /* jumpTable */ + const BYTE* const istart2 = istart1 + length1; + const BYTE* const istart3 = istart2 + length2; + const BYTE* const istart4 = istart3 + length3; + const size_t segmentSize = (dstSize+3) / 4; + BYTE* const opStart2 = ostart + segmentSize; + BYTE* const opStart3 = opStart2 + segmentSize; + BYTE* const opStart4 = opStart3 + segmentSize; + BYTE* op1 = ostart; + BYTE* op2 = opStart2; + BYTE* op3 = opStart3; + BYTE* op4 = opStart4; + U32 endSignal; + + length4 = cSrcSize - (length1 + length2 + length3 + 6); + if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ + errorCode = BIT_initDStream(&bitD1, istart1, length1); + if (HUF_isError(errorCode)) return errorCode; + errorCode = BIT_initDStream(&bitD2, istart2, length2); + if (HUF_isError(errorCode)) return errorCode; + errorCode = BIT_initDStream(&bitD3, istart3, length3); + if (HUF_isError(errorCode)) return errorCode; + errorCode = BIT_initDStream(&bitD4, istart4, length4); + if (HUF_isError(errorCode)) return errorCode; + + /* 16-32 symbols per loop (4-8 symbols per stream) */ + endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); + for ( ; (endSignal==BIT_DStream_unfinished) && (op4<(oend-7)) ; ) + { + HUF_DECODE_SYMBOLX4_2(op1, &bitD1); + HUF_DECODE_SYMBOLX4_2(op2, &bitD2); + HUF_DECODE_SYMBOLX4_2(op3, &bitD3); + HUF_DECODE_SYMBOLX4_2(op4, &bitD4); + HUF_DECODE_SYMBOLX4_1(op1, &bitD1); + HUF_DECODE_SYMBOLX4_1(op2, &bitD2); + HUF_DECODE_SYMBOLX4_1(op3, &bitD3); + HUF_DECODE_SYMBOLX4_1(op4, &bitD4); + HUF_DECODE_SYMBOLX4_2(op1, &bitD1); + HUF_DECODE_SYMBOLX4_2(op2, &bitD2); + HUF_DECODE_SYMBOLX4_2(op3, &bitD3); + HUF_DECODE_SYMBOLX4_2(op4, &bitD4); + HUF_DECODE_SYMBOLX4_0(op1, &bitD1); + HUF_DECODE_SYMBOLX4_0(op2, &bitD2); + HUF_DECODE_SYMBOLX4_0(op3, &bitD3); + HUF_DECODE_SYMBOLX4_0(op4, &bitD4); + + endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); + } + + /* check corruption */ + if (op1 > opStart2) return ERROR(corruption_detected); + if (op2 > opStart3) return ERROR(corruption_detected); + if (op3 > opStart4) return ERROR(corruption_detected); + /* note : op4 supposed already verified within main loop */ + + /* finish bitStreams one by one */ + HUF_decodeStreamX4(op1, &bitD1, opStart2, dt, dtLog); + HUF_decodeStreamX4(op2, &bitD2, opStart3, dt, dtLog); + HUF_decodeStreamX4(op3, &bitD3, opStart4, dt, dtLog); + HUF_decodeStreamX4(op4, &bitD4, oend, dt, dtLog); + + /* check */ + endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); + if (!endSignal) return ERROR(corruption_detected); + + /* decoded size */ + return dstSize; + } +} + + +static size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + HUF_CREATE_STATIC_DTABLEX4(DTable, HUF_MAX_TABLELOG); + const BYTE* ip = (const BYTE*) cSrc; + + size_t hSize = HUF_readDTableX4 (DTable, cSrc, cSrcSize); + if (HUF_isError(hSize)) return hSize; + if (hSize >= cSrcSize) return ERROR(srcSize_wrong); + ip += hSize; + cSrcSize -= hSize; + + return HUF_decompress4X4_usingDTable (dst, dstSize, ip, cSrcSize, DTable); +} + + +/**********************************/ +/* quad-symbol decoding */ +/**********************************/ +typedef struct { BYTE nbBits; BYTE nbBytes; } HUF_DDescX6; +typedef union { BYTE byte[4]; U32 sequence; } HUF_DSeqX6; + +/* recursive, up to level 3; may benefit from